Skip to content

Commit d0e3b71

Browse files
committed
Merge remote-tracking branch 'upstream/main' into add_source_context
2 parents 5b7b7d5 + fabf32c commit d0e3b71

23 files changed

+668
-188
lines changed

TESTING.asciidoc

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -681,12 +681,15 @@ There are multiple base classes for tests:
681681
directly by unit tests.
682682
* **`ESSingleNodeTestCase`**: This test case sets up a cluster that has a
683683
single node.
684-
* **`ESIntegTestCase`**: An integration test case that creates a cluster that
685-
might have multiple nodes.
686-
* **`ESRestTestCase`**: An integration tests that interacts with an external
687-
cluster via the REST API. This is used for Java based REST tests.
688-
* **`ESClientYamlSuiteTestCase` **: A subclass of `ESRestTestCase` used to run
689-
YAML based REST tests.
684+
* **`ESIntegTestCase`**: An internal integration test that starts nodes within the same JVM as the test.
685+
These allow you to test functionality that is not exposed via the REST API, or for verifying a certain internal state.
686+
Additionally, you can easily simulate tricky distributed setups that are difficult to do in REST tests.
687+
If you only need to start one node, use `ESSingleNodeTestCase` instead, which is a much lighter test setup.
688+
* **`ESRestTestCase`**: An integration test that interacts with an external
689+
cluster via the REST API. This is used for Java based REST tests. This should
690+
be the first choice for writing integration tests as these tests run in a much more
691+
realistic setup.
692+
* **`ESClientYamlSuiteTestCase` **: A subclass of `ESRestTestCase` used to run YAML based REST tests.
690693

691694
=== Good practices
692695

muted-tests.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -528,6 +528,9 @@ tests:
528528
- class: org.elasticsearch.xpack.cluster.routing.allocation.DataTierAllocationDeciderIT
529529
method: testShardsAreKeptInPreferredTierUntilTheNextTierIsInItsFinalState
530530
issue: https://github.com/elastic/elasticsearch/issues/134050
531+
- class: org.elasticsearch.cluster.ClusterInfoServiceIT
532+
method: testMaxQueueLatenciesInClusterInfo
533+
issue: https://github.com/elastic/elasticsearch/issues/134088
531534

532535
# Examples:
533536
#

server/src/main/java/org/elasticsearch/TransportVersions.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -358,7 +358,8 @@ static TransportVersion def(int id) {
358358
public static final TransportVersion RESOLVE_INDEX_MODE_FILTER = def(9_149_0_00);
359359
public static final TransportVersion SEMANTIC_QUERY_MULTIPLE_INFERENCE_IDS = def(9_150_0_00);
360360
public static final TransportVersion ESQL_LOOKUP_JOIN_PRE_JOIN_FILTER = def(9_151_0_00);
361-
public static final TransportVersion SOURCE_CONTEXT = def(9_152_0_00);
361+
public static final TransportVersion INFERENCE_API_DISABLE_EIS_RATE_LIMITING = def(9_152_0_00);
362+
public static final TransportVersion SOURCE_CONTEXT = def(9_153_0_00);
362363

363364
/*
364365
* STOP! READ THIS FIRST! No, really,

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/http/sender/RequestExecutorService.java

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -449,9 +449,11 @@ public synchronized TimeValue executeEnqueuedTask() {
449449
}
450450

451451
private TimeValue executeEnqueuedTaskInternal() {
452-
var timeBeforeAvailableToken = rateLimiter.timeToReserve(1);
453-
if (shouldExecuteImmediately(timeBeforeAvailableToken) == false) {
454-
return timeBeforeAvailableToken;
452+
if (rateLimitSettings.isEnabled()) {
453+
var timeBeforeAvailableToken = rateLimiter.timeToReserve(1);
454+
if (shouldExecuteImmediately(timeBeforeAvailableToken) == false) {
455+
return timeBeforeAvailableToken;
456+
}
455457
}
456458

457459
var task = queue.poll();
@@ -463,9 +465,11 @@ private TimeValue executeEnqueuedTaskInternal() {
463465
return NO_TASKS_AVAILABLE;
464466
}
465467

466-
// We should never have to wait because we checked above
467-
var reserveRes = rateLimiter.reserve(1);
468-
assert shouldExecuteImmediately(reserveRes) : "Reserving request tokens required a sleep when it should not have";
468+
if (rateLimitSettings.isEnabled()) {
469+
// We should never have to wait because we checked above
470+
var reserveRes = rateLimiter.reserve(1);
471+
assert shouldExecuteImmediately(reserveRes) : "Reserving request tokens required a sleep when it should not have";
472+
}
469473

470474
task.getRequestManager()
471475
.execute(task.getInferenceInputs(), requestSender, task.getRequestCompletedFunction(), task.getListener());

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceService.java

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,7 @@ private static Map<String, DefaultModelConfig> initDefaultEndpoints(
193193
DEFAULT_CHAT_COMPLETION_ENDPOINT_ID_V1,
194194
TaskType.CHAT_COMPLETION,
195195
NAME,
196-
new ElasticInferenceServiceCompletionServiceSettings(DEFAULT_CHAT_COMPLETION_MODEL_ID_V1, null),
196+
new ElasticInferenceServiceCompletionServiceSettings(DEFAULT_CHAT_COMPLETION_MODEL_ID_V1),
197197
EmptyTaskSettings.INSTANCE,
198198
EmptySecretSettings.INSTANCE,
199199
elasticInferenceServiceComponents
@@ -206,7 +206,7 @@ private static Map<String, DefaultModelConfig> initDefaultEndpoints(
206206
DEFAULT_ELSER_ENDPOINT_ID_V2,
207207
TaskType.SPARSE_EMBEDDING,
208208
NAME,
209-
new ElasticInferenceServiceSparseEmbeddingsServiceSettings(DEFAULT_ELSER_2_MODEL_ID, null, null),
209+
new ElasticInferenceServiceSparseEmbeddingsServiceSettings(DEFAULT_ELSER_2_MODEL_ID, null),
210210
EmptyTaskSettings.INSTANCE,
211211
EmptySecretSettings.INSTANCE,
212212
elasticInferenceServiceComponents,
@@ -224,8 +224,7 @@ private static Map<String, DefaultModelConfig> initDefaultEndpoints(
224224
DEFAULT_MULTILINGUAL_EMBED_MODEL_ID,
225225
defaultDenseTextEmbeddingsSimilarity(),
226226
null,
227-
null,
228-
ElasticInferenceServiceDenseTextEmbeddingsServiceSettings.DEFAULT_RATE_LIMIT_SETTINGS
227+
null
229228
),
230229
EmptyTaskSettings.INSTANCE,
231230
EmptySecretSettings.INSTANCE,
@@ -245,7 +244,7 @@ private static Map<String, DefaultModelConfig> initDefaultEndpoints(
245244
DEFAULT_RERANK_ENDPOINT_ID_V1,
246245
TaskType.RERANK,
247246
NAME,
248-
new ElasticInferenceServiceRerankServiceSettings(DEFAULT_RERANK_MODEL_ID_V1, null),
247+
new ElasticInferenceServiceRerankServiceSettings(DEFAULT_RERANK_MODEL_ID_V1),
249248
EmptyTaskSettings.INSTANCE,
250249
EmptySecretSettings.INSTANCE,
251250
elasticInferenceServiceComponents
@@ -622,8 +621,7 @@ public Model updateModelWithEmbeddingDetails(Model model, int embeddingSize) {
622621
modelId,
623622
similarityToUse,
624623
embeddingSize,
625-
maxInputTokens,
626-
serviceSettings.rateLimitSettings()
624+
maxInputTokens
627625
);
628626

629627
return new ElasticInferenceServiceDenseTextEmbeddingsModel(embeddingsModel, updateServiceSettings);

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/completion/ElasticInferenceServiceCompletionModel.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,7 @@ public static ElasticInferenceServiceCompletionModel of(
3535
) {
3636
var originalModelServiceSettings = model.getServiceSettings();
3737
var overriddenServiceSettings = new ElasticInferenceServiceCompletionServiceSettings(
38-
Objects.requireNonNullElse(request.model(), originalModelServiceSettings.modelId()),
39-
originalModelServiceSettings.rateLimitSettings()
38+
Objects.requireNonNullElse(request.model(), originalModelServiceSettings.modelId())
4039
);
4140

4241
return new ElasticInferenceServiceCompletionModel(model, overriddenServiceSettings);

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/completion/ElasticInferenceServiceCompletionServiceSettings.java

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
import org.elasticsearch.common.io.stream.StreamOutput;
1515
import org.elasticsearch.inference.ModelConfigurations;
1616
import org.elasticsearch.inference.ServiceSettings;
17+
import org.elasticsearch.inference.TaskType;
1718
import org.elasticsearch.xcontent.XContentBuilder;
1819
import org.elasticsearch.xpack.inference.services.ConfigurationParseContext;
1920
import org.elasticsearch.xpack.inference.services.elastic.ElasticInferenceService;
@@ -35,38 +36,41 @@ public class ElasticInferenceServiceCompletionServiceSettings extends FilteredXC
3536

3637
public static final String NAME = "elastic_inference_service_completion_service_settings";
3738

38-
private static final RateLimitSettings DEFAULT_RATE_LIMIT_SETTINGS = new RateLimitSettings(720L);
39-
4039
public static ElasticInferenceServiceCompletionServiceSettings fromMap(Map<String, Object> map, ConfigurationParseContext context) {
4140
ValidationException validationException = new ValidationException();
4241

4342
String modelId = extractRequiredString(map, MODEL_ID, ModelConfigurations.SERVICE_SETTINGS, validationException);
44-
RateLimitSettings rateLimitSettings = RateLimitSettings.of(
43+
44+
RateLimitSettings.rejectRateLimitFieldForRequestContext(
4545
map,
46-
DEFAULT_RATE_LIMIT_SETTINGS,
47-
validationException,
46+
ModelConfigurations.SERVICE_SETTINGS,
4847
ElasticInferenceService.NAME,
49-
context
48+
TaskType.CHAT_COMPLETION,
49+
context,
50+
validationException
5051
);
5152

5253
if (validationException.validationErrors().isEmpty() == false) {
5354
throw validationException;
5455
}
5556

56-
return new ElasticInferenceServiceCompletionServiceSettings(modelId, rateLimitSettings);
57+
return new ElasticInferenceServiceCompletionServiceSettings(modelId);
5758
}
5859

5960
private final String modelId;
6061
private final RateLimitSettings rateLimitSettings;
6162

62-
public ElasticInferenceServiceCompletionServiceSettings(String modelId, RateLimitSettings rateLimitSettings) {
63+
public ElasticInferenceServiceCompletionServiceSettings(String modelId) {
6364
this.modelId = Objects.requireNonNull(modelId);
64-
this.rateLimitSettings = Objects.requireNonNullElse(rateLimitSettings, DEFAULT_RATE_LIMIT_SETTINGS);
65+
this.rateLimitSettings = RateLimitSettings.DISABLED_INSTANCE;
6566
}
6667

6768
public ElasticInferenceServiceCompletionServiceSettings(StreamInput in) throws IOException {
6869
this.modelId = in.readString();
69-
this.rateLimitSettings = new RateLimitSettings(in);
70+
this.rateLimitSettings = RateLimitSettings.DISABLED_INSTANCE;
71+
if (in.getTransportVersion().before(TransportVersions.INFERENCE_API_DISABLE_EIS_RATE_LIMITING)) {
72+
new RateLimitSettings(in);
73+
}
7074
}
7175

7276
@Override
@@ -110,7 +114,9 @@ protected XContentBuilder toXContentFragmentOfExposedFields(XContentBuilder buil
110114
@Override
111115
public void writeTo(StreamOutput out) throws IOException {
112116
out.writeString(modelId);
113-
rateLimitSettings.writeTo(out);
117+
if (out.getTransportVersion().before(TransportVersions.INFERENCE_API_DISABLE_EIS_RATE_LIMITING)) {
118+
rateLimitSettings.writeTo(out);
119+
}
114120
}
115121

116122
@Override

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/densetextembeddings/ElasticInferenceServiceDenseTextEmbeddingsServiceSettings.java

Lines changed: 17 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import org.elasticsearch.inference.ModelConfigurations;
1818
import org.elasticsearch.inference.ServiceSettings;
1919
import org.elasticsearch.inference.SimilarityMeasure;
20+
import org.elasticsearch.inference.TaskType;
2021
import org.elasticsearch.xcontent.XContentBuilder;
2122
import org.elasticsearch.xpack.inference.services.ConfigurationParseContext;
2223
import org.elasticsearch.xpack.inference.services.elastic.ElasticInferenceService;
@@ -43,8 +44,6 @@ public class ElasticInferenceServiceDenseTextEmbeddingsServiceSettings extends F
4344

4445
public static final String NAME = "elastic_inference_service_dense_embeddings_service_settings";
4546

46-
public static final RateLimitSettings DEFAULT_RATE_LIMIT_SETTINGS = new RateLimitSettings(10_000);
47-
4847
private final String modelId;
4948
private final SimilarityMeasure similarity;
5049
private final Integer dimensions;
@@ -54,85 +53,53 @@ public class ElasticInferenceServiceDenseTextEmbeddingsServiceSettings extends F
5453
public static ElasticInferenceServiceDenseTextEmbeddingsServiceSettings fromMap(
5554
Map<String, Object> map,
5655
ConfigurationParseContext context
57-
) {
58-
return switch (context) {
59-
case REQUEST -> fromRequestMap(map, context);
60-
case PERSISTENT -> fromPersistentMap(map, context);
61-
};
62-
}
63-
64-
private static ElasticInferenceServiceDenseTextEmbeddingsServiceSettings fromRequestMap(
65-
Map<String, Object> map,
66-
ConfigurationParseContext context
6756
) {
6857
ValidationException validationException = new ValidationException();
6958

7059
String modelId = extractRequiredString(map, MODEL_ID, ModelConfigurations.SERVICE_SETTINGS, validationException);
71-
RateLimitSettings rateLimitSettings = RateLimitSettings.of(
72-
map,
73-
DEFAULT_RATE_LIMIT_SETTINGS,
74-
validationException,
75-
ElasticInferenceService.NAME,
76-
context
77-
);
78-
7960
SimilarityMeasure similarity = extractSimilarity(map, ModelConfigurations.SERVICE_SETTINGS, validationException);
8061
Integer dims = removeAsType(map, DIMENSIONS, Integer.class);
8162
Integer maxInputTokens = removeAsType(map, MAX_INPUT_TOKENS, Integer.class);
8263

83-
if (validationException.validationErrors().isEmpty() == false) {
84-
throw validationException;
85-
}
86-
87-
return new ElasticInferenceServiceDenseTextEmbeddingsServiceSettings(modelId, similarity, dims, maxInputTokens, rateLimitSettings);
88-
}
89-
90-
private static ElasticInferenceServiceDenseTextEmbeddingsServiceSettings fromPersistentMap(
91-
Map<String, Object> map,
92-
ConfigurationParseContext context
93-
) {
94-
ValidationException validationException = new ValidationException();
95-
96-
String modelId = extractRequiredString(map, MODEL_ID, ModelConfigurations.SERVICE_SETTINGS, validationException);
97-
RateLimitSettings rateLimitSettings = RateLimitSettings.of(
64+
RateLimitSettings.rejectRateLimitFieldForRequestContext(
9865
map,
99-
DEFAULT_RATE_LIMIT_SETTINGS,
100-
validationException,
66+
ModelConfigurations.SERVICE_SETTINGS,
10167
ElasticInferenceService.NAME,
102-
context
68+
TaskType.TEXT_EMBEDDING,
69+
context,
70+
validationException
10371
);
10472

105-
SimilarityMeasure similarity = extractSimilarity(map, ModelConfigurations.SERVICE_SETTINGS, validationException);
106-
Integer dims = removeAsType(map, DIMENSIONS, Integer.class);
107-
Integer maxInputTokens = removeAsType(map, MAX_INPUT_TOKENS, Integer.class);
108-
10973
if (validationException.validationErrors().isEmpty() == false) {
11074
throw validationException;
11175
}
11276

113-
return new ElasticInferenceServiceDenseTextEmbeddingsServiceSettings(modelId, similarity, dims, maxInputTokens, rateLimitSettings);
77+
return new ElasticInferenceServiceDenseTextEmbeddingsServiceSettings(modelId, similarity, dims, maxInputTokens);
11478
}
11579

11680
public ElasticInferenceServiceDenseTextEmbeddingsServiceSettings(
11781
String modelId,
11882
@Nullable SimilarityMeasure similarity,
11983
@Nullable Integer dimensions,
120-
@Nullable Integer maxInputTokens,
121-
RateLimitSettings rateLimitSettings
84+
@Nullable Integer maxInputTokens
12285
) {
12386
this.modelId = modelId;
12487
this.similarity = similarity;
12588
this.dimensions = dimensions;
12689
this.maxInputTokens = maxInputTokens;
127-
this.rateLimitSettings = Objects.requireNonNullElse(rateLimitSettings, DEFAULT_RATE_LIMIT_SETTINGS);
90+
this.rateLimitSettings = RateLimitSettings.DISABLED_INSTANCE;
12891
}
12992

13093
public ElasticInferenceServiceDenseTextEmbeddingsServiceSettings(StreamInput in) throws IOException {
13194
this.modelId = in.readString();
13295
this.similarity = in.readOptionalEnum(SimilarityMeasure.class);
13396
this.dimensions = in.readOptionalVInt();
13497
this.maxInputTokens = in.readOptionalVInt();
135-
this.rateLimitSettings = new RateLimitSettings(in);
98+
this.rateLimitSettings = RateLimitSettings.DISABLED_INSTANCE;
99+
100+
if (in.getTransportVersion().before(TransportVersions.INFERENCE_API_DISABLE_EIS_RATE_LIMITING)) {
101+
new RateLimitSettings(in);
102+
}
136103
}
137104

138105
@Override
@@ -221,7 +188,9 @@ public void writeTo(StreamOutput out) throws IOException {
221188
out.writeOptionalEnum(SimilarityMeasure.translateSimilarity(similarity, out.getTransportVersion()));
222189
out.writeOptionalVInt(dimensions);
223190
out.writeOptionalVInt(maxInputTokens);
224-
rateLimitSettings.writeTo(out);
191+
if (out.getTransportVersion().before(TransportVersions.INFERENCE_API_DISABLE_EIS_RATE_LIMITING)) {
192+
rateLimitSettings.writeTo(out);
193+
}
225194
}
226195

227196
@Override

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/rerank/ElasticInferenceServiceRerankServiceSettings.java

Lines changed: 21 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
import org.elasticsearch.common.io.stream.StreamOutput;
1515
import org.elasticsearch.inference.ModelConfigurations;
1616
import org.elasticsearch.inference.ServiceSettings;
17+
import org.elasticsearch.inference.TaskType;
1718
import org.elasticsearch.xcontent.XContentBuilder;
1819
import org.elasticsearch.xpack.inference.services.ConfigurationParseContext;
1920
import org.elasticsearch.xpack.inference.services.elastic.ElasticInferenceService;
@@ -35,35 +36,42 @@ public class ElasticInferenceServiceRerankServiceSettings extends FilteredXConte
3536

3637
public static final String NAME = "elastic_rerank_service_settings";
3738

38-
private static final RateLimitSettings DEFAULT_RATE_LIMIT_SETTINGS = new RateLimitSettings(500);
39-
4039
public static ElasticInferenceServiceRerankServiceSettings fromMap(Map<String, Object> map, ConfigurationParseContext context) {
4140
ValidationException validationException = new ValidationException();
4241

4342
String modelId = extractRequiredString(map, MODEL_ID, ModelConfigurations.SERVICE_SETTINGS, validationException);
44-
RateLimitSettings rateLimitSettings = RateLimitSettings.of(
43+
44+
RateLimitSettings.rejectRateLimitFieldForRequestContext(
4545
map,
46-
DEFAULT_RATE_LIMIT_SETTINGS,
47-
validationException,
46+
ModelConfigurations.SERVICE_SETTINGS,
4847
ElasticInferenceService.NAME,
49-
context
48+
TaskType.RERANK,
49+
context,
50+
validationException
5051
);
5152

52-
return new ElasticInferenceServiceRerankServiceSettings(modelId, rateLimitSettings);
53+
if (validationException.validationErrors().isEmpty() == false) {
54+
throw validationException;
55+
}
56+
57+
return new ElasticInferenceServiceRerankServiceSettings(modelId);
5358
}
5459

5560
private final String modelId;
5661

5762
private final RateLimitSettings rateLimitSettings;
5863

59-
public ElasticInferenceServiceRerankServiceSettings(String modelId, RateLimitSettings rateLimitSettings) {
64+
public ElasticInferenceServiceRerankServiceSettings(String modelId) {
6065
this.modelId = Objects.requireNonNull(modelId);
61-
this.rateLimitSettings = Objects.requireNonNullElse(rateLimitSettings, DEFAULT_RATE_LIMIT_SETTINGS);
66+
this.rateLimitSettings = RateLimitSettings.DISABLED_INSTANCE;
6267
}
6368

6469
public ElasticInferenceServiceRerankServiceSettings(StreamInput in) throws IOException {
6570
this.modelId = in.readString();
66-
this.rateLimitSettings = new RateLimitSettings(in);
71+
this.rateLimitSettings = RateLimitSettings.DISABLED_INSTANCE;
72+
if (in.getTransportVersion().before(TransportVersions.INFERENCE_API_DISABLE_EIS_RATE_LIMITING)) {
73+
new RateLimitSettings(in);
74+
}
6775
}
6876

6977
@Override
@@ -115,7 +123,9 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
115123
@Override
116124
public void writeTo(StreamOutput out) throws IOException {
117125
out.writeString(modelId);
118-
rateLimitSettings.writeTo(out);
126+
if (out.getTransportVersion().before(TransportVersions.INFERENCE_API_DISABLE_EIS_RATE_LIMITING)) {
127+
rateLimitSettings.writeTo(out);
128+
}
119129
}
120130

121131
@Override

0 commit comments

Comments
 (0)