Skip to content

Commit 80d4224

Browse files
Adding transport version change
1 parent fca2543 commit 80d4224

20 files changed

+273
-147
lines changed

server/src/main/java/org/elasticsearch/TransportVersions.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -357,6 +357,7 @@ static TransportVersion def(int id) {
357357
public static final TransportVersion ESQL_SAMPLE_OPERATOR_STATUS = def(9_127_0_00);
358358
public static final TransportVersion ALLOCATION_DECISION_NOT_PREFERRED = def(9_145_0_00);
359359
public static final TransportVersion ESQL_QUALIFIERS_IN_ATTRIBUTES = def(9_146_0_00);
360+
public static final TransportVersion INFERENCE_API_DISABLE_EIS_RATE_LIMITING = def(9_147_0_00);
360361

361362
/*
362363
* STOP! READ THIS FIRST! No, really,

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceService.java

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,7 @@ private static Map<String, DefaultModelConfig> initDefaultEndpoints(
193193
DEFAULT_CHAT_COMPLETION_ENDPOINT_ID_V1,
194194
TaskType.CHAT_COMPLETION,
195195
NAME,
196-
new ElasticInferenceServiceCompletionServiceSettings(DEFAULT_CHAT_COMPLETION_MODEL_ID_V1, null),
196+
new ElasticInferenceServiceCompletionServiceSettings(DEFAULT_CHAT_COMPLETION_MODEL_ID_V1),
197197
EmptyTaskSettings.INSTANCE,
198198
EmptySecretSettings.INSTANCE,
199199
elasticInferenceServiceComponents
@@ -206,7 +206,7 @@ private static Map<String, DefaultModelConfig> initDefaultEndpoints(
206206
DEFAULT_ELSER_ENDPOINT_ID_V2,
207207
TaskType.SPARSE_EMBEDDING,
208208
NAME,
209-
new ElasticInferenceServiceSparseEmbeddingsServiceSettings(DEFAULT_ELSER_2_MODEL_ID, null, null),
209+
new ElasticInferenceServiceSparseEmbeddingsServiceSettings(DEFAULT_ELSER_2_MODEL_ID, null),
210210
EmptyTaskSettings.INSTANCE,
211211
EmptySecretSettings.INSTANCE,
212212
elasticInferenceServiceComponents,
@@ -224,8 +224,7 @@ private static Map<String, DefaultModelConfig> initDefaultEndpoints(
224224
DEFAULT_MULTILINGUAL_EMBED_MODEL_ID,
225225
defaultDenseTextEmbeddingsSimilarity(),
226226
null,
227-
null,
228-
ElasticInferenceServiceDenseTextEmbeddingsServiceSettings.DEFAULT_RATE_LIMIT_SETTINGS
227+
null
229228
),
230229
EmptyTaskSettings.INSTANCE,
231230
EmptySecretSettings.INSTANCE,
@@ -245,7 +244,7 @@ private static Map<String, DefaultModelConfig> initDefaultEndpoints(
245244
DEFAULT_RERANK_ENDPOINT_ID_V1,
246245
TaskType.RERANK,
247246
NAME,
248-
new ElasticInferenceServiceRerankServiceSettings(DEFAULT_RERANK_MODEL_ID_V1, null),
247+
new ElasticInferenceServiceRerankServiceSettings(DEFAULT_RERANK_MODEL_ID_V1),
249248
EmptyTaskSettings.INSTANCE,
250249
EmptySecretSettings.INSTANCE,
251250
elasticInferenceServiceComponents
@@ -622,8 +621,7 @@ public Model updateModelWithEmbeddingDetails(Model model, int embeddingSize) {
622621
modelId,
623622
similarityToUse,
624623
embeddingSize,
625-
maxInputTokens,
626-
serviceSettings.rateLimitSettings()
624+
maxInputTokens
627625
);
628626

629627
return new ElasticInferenceServiceDenseTextEmbeddingsModel(embeddingsModel, updateServiceSettings);

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/completion/ElasticInferenceServiceCompletionModel.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,7 @@ public static ElasticInferenceServiceCompletionModel of(
3535
) {
3636
var originalModelServiceSettings = model.getServiceSettings();
3737
var overriddenServiceSettings = new ElasticInferenceServiceCompletionServiceSettings(
38-
Objects.requireNonNullElse(request.model(), originalModelServiceSettings.modelId()),
39-
originalModelServiceSettings.rateLimitSettings()
38+
Objects.requireNonNullElse(request.model(), originalModelServiceSettings.modelId())
4039
);
4140

4241
return new ElasticInferenceServiceCompletionModel(model, overriddenServiceSettings);

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/completion/ElasticInferenceServiceCompletionServiceSettings.java

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -34,32 +34,33 @@ public class ElasticInferenceServiceCompletionServiceSettings extends FilteredXC
3434

3535
public static final String NAME = "elastic_inference_service_completion_service_settings";
3636

37-
private static final RateLimitSettings DEFAULT_RATE_LIMIT_SETTINGS = new RateLimitSettings(720L);
38-
3937
public static ElasticInferenceServiceCompletionServiceSettings fromMap(Map<String, Object> map, ConfigurationParseContext context) {
4038
ValidationException validationException = new ValidationException();
4139

4240
String modelId = extractRequiredString(map, MODEL_ID, ModelConfigurations.SERVICE_SETTINGS, validationException);
43-
RateLimitSettings rateLimitSettings = RateLimitSettings.disabledRateLimiting(map);
41+
RateLimitSettings.disabledRateLimiting(map);
4442

4543
if (validationException.validationErrors().isEmpty() == false) {
4644
throw validationException;
4745
}
4846

49-
return new ElasticInferenceServiceCompletionServiceSettings(modelId, rateLimitSettings);
47+
return new ElasticInferenceServiceCompletionServiceSettings(modelId);
5048
}
5149

5250
private final String modelId;
5351
private final RateLimitSettings rateLimitSettings;
5452

55-
public ElasticInferenceServiceCompletionServiceSettings(String modelId, RateLimitSettings rateLimitSettings) {
53+
public ElasticInferenceServiceCompletionServiceSettings(String modelId) {
5654
this.modelId = Objects.requireNonNull(modelId);
57-
this.rateLimitSettings = Objects.requireNonNullElse(rateLimitSettings, DEFAULT_RATE_LIMIT_SETTINGS);
55+
this.rateLimitSettings = RateLimitSettings.DISABLED_INSTANCE;
5856
}
5957

6058
public ElasticInferenceServiceCompletionServiceSettings(StreamInput in) throws IOException {
6159
this.modelId = in.readString();
62-
this.rateLimitSettings = new RateLimitSettings(in);
60+
this.rateLimitSettings = RateLimitSettings.DISABLED_INSTANCE;
61+
if (in.getTransportVersion().before(TransportVersions.INFERENCE_API_DISABLE_EIS_RATE_LIMITING)) {
62+
new RateLimitSettings(in);
63+
}
6364
}
6465

6566
@Override
@@ -103,7 +104,9 @@ protected XContentBuilder toXContentFragmentOfExposedFields(XContentBuilder buil
103104
@Override
104105
public void writeTo(StreamOutput out) throws IOException {
105106
out.writeString(modelId);
106-
rateLimitSettings.writeTo(out);
107+
if (out.getTransportVersion().before(TransportVersions.INFERENCE_API_DISABLE_EIS_RATE_LIMITING)) {
108+
rateLimitSettings.writeTo(out);
109+
}
107110
}
108111

109112
@Override

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/densetextembeddings/ElasticInferenceServiceDenseTextEmbeddingsServiceSettings.java

Lines changed: 14 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
import org.elasticsearch.inference.SimilarityMeasure;
2020
import org.elasticsearch.xcontent.XContentBuilder;
2121
import org.elasticsearch.xpack.inference.services.ConfigurationParseContext;
22-
import org.elasticsearch.xpack.inference.services.elastic.ElasticInferenceService;
2322
import org.elasticsearch.xpack.inference.services.elastic.ElasticInferenceServiceRateLimitServiceSettings;
2423
import org.elasticsearch.xpack.inference.services.settings.FilteredXContentObject;
2524
import org.elasticsearch.xpack.inference.services.settings.RateLimitSettings;
@@ -43,8 +42,6 @@ public class ElasticInferenceServiceDenseTextEmbeddingsServiceSettings extends F
4342

4443
public static final String NAME = "elastic_inference_service_dense_embeddings_service_settings";
4544

46-
public static final RateLimitSettings DEFAULT_RATE_LIMIT_SETTINGS = new RateLimitSettings(10_000);
47-
4845
private final String modelId;
4946
private final SimilarityMeasure similarity;
5047
private final Integer dimensions;
@@ -68,13 +65,7 @@ private static ElasticInferenceServiceDenseTextEmbeddingsServiceSettings fromReq
6865
ValidationException validationException = new ValidationException();
6966

7067
String modelId = extractRequiredString(map, MODEL_ID, ModelConfigurations.SERVICE_SETTINGS, validationException);
71-
RateLimitSettings rateLimitSettings = RateLimitSettings.of(
72-
map,
73-
DEFAULT_RATE_LIMIT_SETTINGS,
74-
validationException,
75-
ElasticInferenceService.NAME,
76-
context
77-
);
68+
RateLimitSettings.disabledRateLimiting(map);
7869

7970
SimilarityMeasure similarity = extractSimilarity(map, ModelConfigurations.SERVICE_SETTINGS, validationException);
8071
Integer dims = removeAsType(map, DIMENSIONS, Integer.class);
@@ -84,7 +75,7 @@ private static ElasticInferenceServiceDenseTextEmbeddingsServiceSettings fromReq
8475
throw validationException;
8576
}
8677

87-
return new ElasticInferenceServiceDenseTextEmbeddingsServiceSettings(modelId, similarity, dims, maxInputTokens, rateLimitSettings);
78+
return new ElasticInferenceServiceDenseTextEmbeddingsServiceSettings(modelId, similarity, dims, maxInputTokens);
8879
}
8980

9081
private static ElasticInferenceServiceDenseTextEmbeddingsServiceSettings fromPersistentMap(
@@ -94,13 +85,7 @@ private static ElasticInferenceServiceDenseTextEmbeddingsServiceSettings fromPer
9485
ValidationException validationException = new ValidationException();
9586

9687
String modelId = extractRequiredString(map, MODEL_ID, ModelConfigurations.SERVICE_SETTINGS, validationException);
97-
RateLimitSettings rateLimitSettings = RateLimitSettings.of(
98-
map,
99-
DEFAULT_RATE_LIMIT_SETTINGS,
100-
validationException,
101-
ElasticInferenceService.NAME,
102-
context
103-
);
88+
RateLimitSettings.disabledRateLimiting(map);
10489

10590
SimilarityMeasure similarity = extractSimilarity(map, ModelConfigurations.SERVICE_SETTINGS, validationException);
10691
Integer dims = removeAsType(map, DIMENSIONS, Integer.class);
@@ -110,29 +95,32 @@ private static ElasticInferenceServiceDenseTextEmbeddingsServiceSettings fromPer
11095
throw validationException;
11196
}
11297

113-
return new ElasticInferenceServiceDenseTextEmbeddingsServiceSettings(modelId, similarity, dims, maxInputTokens, rateLimitSettings);
98+
return new ElasticInferenceServiceDenseTextEmbeddingsServiceSettings(modelId, similarity, dims, maxInputTokens);
11499
}
115100

116101
public ElasticInferenceServiceDenseTextEmbeddingsServiceSettings(
117102
String modelId,
118103
@Nullable SimilarityMeasure similarity,
119104
@Nullable Integer dimensions,
120-
@Nullable Integer maxInputTokens,
121-
RateLimitSettings rateLimitSettings
105+
@Nullable Integer maxInputTokens
122106
) {
123107
this.modelId = modelId;
124108
this.similarity = similarity;
125109
this.dimensions = dimensions;
126110
this.maxInputTokens = maxInputTokens;
127-
this.rateLimitSettings = Objects.requireNonNullElse(rateLimitSettings, DEFAULT_RATE_LIMIT_SETTINGS);
111+
this.rateLimitSettings = RateLimitSettings.DISABLED_INSTANCE;
128112
}
129113

130114
public ElasticInferenceServiceDenseTextEmbeddingsServiceSettings(StreamInput in) throws IOException {
131115
this.modelId = in.readString();
132116
this.similarity = in.readOptionalEnum(SimilarityMeasure.class);
133117
this.dimensions = in.readOptionalVInt();
134118
this.maxInputTokens = in.readOptionalVInt();
135-
this.rateLimitSettings = new RateLimitSettings(in);
119+
this.rateLimitSettings = RateLimitSettings.DISABLED_INSTANCE;
120+
121+
if (in.getTransportVersion().before(TransportVersions.INFERENCE_API_DISABLE_EIS_RATE_LIMITING)) {
122+
new RateLimitSettings(in);
123+
}
136124
}
137125

138126
@Override
@@ -221,7 +209,9 @@ public void writeTo(StreamOutput out) throws IOException {
221209
out.writeOptionalEnum(SimilarityMeasure.translateSimilarity(similarity, out.getTransportVersion()));
222210
out.writeOptionalVInt(dimensions);
223211
out.writeOptionalVInt(maxInputTokens);
224-
rateLimitSettings.writeTo(out);
212+
if (out.getTransportVersion().before(TransportVersions.INFERENCE_API_DISABLE_EIS_RATE_LIMITING)) {
213+
rateLimitSettings.writeTo(out);
214+
}
225215
}
226216

227217
@Override

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/rerank/ElasticInferenceServiceRerankServiceSettings.java

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -34,29 +34,30 @@ public class ElasticInferenceServiceRerankServiceSettings extends FilteredXConte
3434

3535
public static final String NAME = "elastic_rerank_service_settings";
3636

37-
private static final RateLimitSettings DEFAULT_RATE_LIMIT_SETTINGS = new RateLimitSettings(500);
38-
3937
public static ElasticInferenceServiceRerankServiceSettings fromMap(Map<String, Object> map, ConfigurationParseContext context) {
4038
ValidationException validationException = new ValidationException();
4139

4240
String modelId = extractRequiredString(map, MODEL_ID, ModelConfigurations.SERVICE_SETTINGS, validationException);
43-
RateLimitSettings rateLimitSettings = RateLimitSettings.disabledRateLimiting(map);
41+
RateLimitSettings.disabledRateLimiting(map);
4442

45-
return new ElasticInferenceServiceRerankServiceSettings(modelId, rateLimitSettings);
43+
return new ElasticInferenceServiceRerankServiceSettings(modelId);
4644
}
4745

4846
private final String modelId;
4947

5048
private final RateLimitSettings rateLimitSettings;
5149

52-
public ElasticInferenceServiceRerankServiceSettings(String modelId, RateLimitSettings rateLimitSettings) {
50+
public ElasticInferenceServiceRerankServiceSettings(String modelId) {
5351
this.modelId = Objects.requireNonNull(modelId);
54-
this.rateLimitSettings = Objects.requireNonNullElse(rateLimitSettings, DEFAULT_RATE_LIMIT_SETTINGS);
52+
this.rateLimitSettings = RateLimitSettings.DISABLED_INSTANCE;
5553
}
5654

5755
public ElasticInferenceServiceRerankServiceSettings(StreamInput in) throws IOException {
5856
this.modelId = in.readString();
59-
this.rateLimitSettings = new RateLimitSettings(in);
57+
this.rateLimitSettings = RateLimitSettings.DISABLED_INSTANCE;
58+
if (in.getTransportVersion().before(TransportVersions.INFERENCE_API_DISABLE_EIS_RATE_LIMITING)) {
59+
new RateLimitSettings(in);
60+
}
6061
}
6162

6263
@Override
@@ -108,7 +109,9 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
108109
@Override
109110
public void writeTo(StreamOutput out) throws IOException {
110111
out.writeString(modelId);
111-
rateLimitSettings.writeTo(out);
112+
if (out.getTransportVersion().before(TransportVersions.INFERENCE_API_DISABLE_EIS_RATE_LIMITING)) {
113+
rateLimitSettings.writeTo(out);
114+
}
112115
}
113116

114117
@Override

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/sparseembeddings/ElasticInferenceServiceSparseEmbeddingsServiceSettings.java

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,6 @@ public class ElasticInferenceServiceSparseEmbeddingsServiceSettings extends Filt
3737

3838
public static final String NAME = "elastic_inference_service_sparse_embeddings_service_settings";
3939

40-
private static final RateLimitSettings DEFAULT_RATE_LIMIT_SETTINGS = new RateLimitSettings(1_000);
41-
4240
public static ElasticInferenceServiceSparseEmbeddingsServiceSettings fromMap(
4341
Map<String, Object> map,
4442
ConfigurationParseContext context
@@ -53,34 +51,33 @@ public static ElasticInferenceServiceSparseEmbeddingsServiceSettings fromMap(
5351
validationException
5452
);
5553

56-
RateLimitSettings rateLimitSettings = RateLimitSettings.disabledRateLimiting(map);
54+
RateLimitSettings.disabledRateLimiting(map);
5755

5856
if (validationException.validationErrors().isEmpty() == false) {
5957
throw validationException;
6058
}
6159

62-
return new ElasticInferenceServiceSparseEmbeddingsServiceSettings(modelId, maxInputTokens, rateLimitSettings);
60+
return new ElasticInferenceServiceSparseEmbeddingsServiceSettings(modelId, maxInputTokens);
6361
}
6462

6563
private final String modelId;
6664

6765
private final Integer maxInputTokens;
6866
private final RateLimitSettings rateLimitSettings;
6967

70-
public ElasticInferenceServiceSparseEmbeddingsServiceSettings(
71-
String modelId,
72-
@Nullable Integer maxInputTokens,
73-
@Nullable RateLimitSettings rateLimitSettings
74-
) {
68+
public ElasticInferenceServiceSparseEmbeddingsServiceSettings(String modelId, @Nullable Integer maxInputTokens) {
7569
this.modelId = Objects.requireNonNull(modelId);
7670
this.maxInputTokens = maxInputTokens;
77-
this.rateLimitSettings = Objects.requireNonNullElse(rateLimitSettings, DEFAULT_RATE_LIMIT_SETTINGS);
71+
this.rateLimitSettings = RateLimitSettings.DISABLED_INSTANCE;
7872
}
7973

8074
public ElasticInferenceServiceSparseEmbeddingsServiceSettings(StreamInput in) throws IOException {
8175
this.modelId = in.readString();
8276
this.maxInputTokens = in.readOptionalVInt();
83-
this.rateLimitSettings = new RateLimitSettings(in);
77+
this.rateLimitSettings = RateLimitSettings.DISABLED_INSTANCE;
78+
if (in.getTransportVersion().before(TransportVersions.INFERENCE_API_DISABLE_EIS_RATE_LIMITING)) {
79+
new RateLimitSettings(in);
80+
}
8481
}
8582

8683
@Override
@@ -132,7 +129,9 @@ protected XContentBuilder toXContentFragmentOfExposedFields(XContentBuilder buil
132129
public void writeTo(StreamOutput out) throws IOException {
133130
out.writeString(modelId);
134131
out.writeOptionalVInt(maxInputTokens);
135-
rateLimitSettings.writeTo(out);
132+
if (out.getTransportVersion().before(TransportVersions.INFERENCE_API_DISABLE_EIS_RATE_LIMITING)) {
133+
rateLimitSettings.writeTo(out);
134+
}
136135
}
137136

138137
@Override

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/settings/RateLimitSettings.java

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
package org.elasticsearch.xpack.inference.services.settings;
99

10+
import org.elasticsearch.TransportVersions;
1011
import org.elasticsearch.common.ValidationException;
1112
import org.elasticsearch.common.io.stream.StreamInput;
1213
import org.elasticsearch.common.io.stream.StreamOutput;
@@ -33,6 +34,7 @@
3334
public class RateLimitSettings implements Writeable, ToXContentFragment {
3435
public static final String FIELD_NAME = "rate_limit";
3536
public static final String REQUESTS_PER_MINUTE_FIELD = "requests_per_minute";
37+
public static final RateLimitSettings DISABLED_INSTANCE = new RateLimitSettings(1, TimeUnit.MINUTES, false);
3638

3739
public static RateLimitSettings of(
3840
Map<String, Object> map,
@@ -54,7 +56,7 @@ public static RateLimitSettings of(
5456
public static RateLimitSettings disabledRateLimiting(Map<String, Object> map) {
5557
removeFromMap(map, FIELD_NAME);
5658

57-
return new RateLimitSettings(1, TimeUnit.MINUTES, false);
59+
return DISABLED_INSTANCE;
5860
}
5961

6062
public static Map<String, SettingsConfiguration> toSettingsConfigurationWithDescription(
@@ -109,15 +111,19 @@ public RateLimitSettings(long requestsPerTimeUnit, TimeUnit timeUnit) {
109111
if (requestsPerTimeUnit <= 0) {
110112
throw new IllegalArgumentException("requests per minute must be positive");
111113
}
112-
this.requestsPerTimeUnit = 0;
114+
this.requestsPerTimeUnit = requestsPerTimeUnit;
113115
this.timeUnit = Objects.requireNonNull(timeUnit);
114116
this.enabled = enabled;
115117
}
116118

117119
public RateLimitSettings(StreamInput in) throws IOException {
118120
requestsPerTimeUnit = in.readVLong();
119121
timeUnit = TimeUnit.MINUTES;
120-
enabled = true;
122+
if (in.getTransportVersion().onOrAfter(TransportVersions.INFERENCE_API_DISABLE_EIS_RATE_LIMITING)) {
123+
enabled = in.readBoolean();
124+
} else {
125+
enabled = true;
126+
}
121127
}
122128

123129
public long requestsPerTimeUnit() {
@@ -147,6 +153,9 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
147153
@Override
148154
public void writeTo(StreamOutput out) throws IOException {
149155
out.writeVLong(requestsPerTimeUnit);
156+
if (out.getTransportVersion().onOrAfter(TransportVersions.INFERENCE_API_DISABLE_EIS_RATE_LIMITING)) {
157+
out.writeBoolean(enabled);
158+
}
150159
}
151160

152161
@Override

0 commit comments

Comments
 (0)