Skip to content

Commit 32517b6

Browse files
committed
[ML] Default endpoint allocations are now configurable
Min and max allocations for the default endpoints' adaptive allocation settings are now configurable via a setting. This is intended to help new clusters running on laptops and in serverless. This does not automatically increase or reduce the default endpoints to those values - we still need a cluster reboot to set the values. Relate #124653
1 parent b78ac7c commit 32517b6

File tree

5 files changed

+42
-9
lines changed

5 files changed

+42
-9
lines changed

x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/inference/assignment/AdaptiveAllocationsSettings.java

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import org.elasticsearch.common.io.stream.StreamInput;
1212
import org.elasticsearch.common.io.stream.StreamOutput;
1313
import org.elasticsearch.common.io.stream.Writeable;
14+
import org.elasticsearch.common.settings.Setting;
1415
import org.elasticsearch.xcontent.ObjectParser;
1516
import org.elasticsearch.xcontent.ParseField;
1617
import org.elasticsearch.xcontent.ToXContentObject;
@@ -22,6 +23,24 @@
2223

2324
public class AdaptiveAllocationsSettings implements ToXContentObject, Writeable {
2425

26+
public static final Setting<Integer> DEFAULT_MIN_ALLOCATIONS = Setting.intSetting(
27+
"xpack.ml.models.default_allocations.min",
28+
0,
29+
0,
30+
32,
31+
Setting.Property.Dynamic,
32+
Setting.Property.NodeScope
33+
);
34+
35+
public static final Setting<Integer> DEFAULT_MAX_ALLOCATIONS = Setting.intSetting(
36+
"xpack.ml.models.default_allocations.max",
37+
32,
38+
1,
39+
32,
40+
Setting.Property.Dynamic,
41+
Setting.Property.NodeScope
42+
);
43+
2544
public static final AdaptiveAllocationsSettings RESET_PLACEHOLDER = new AdaptiveAllocationsSettings(false, -1, -1);
2645

2746
public static final ParseField ENABLED = new ParseField("enabled");

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticRerankerServiceSettings.java

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,19 +9,27 @@
99

1010
import org.elasticsearch.common.ValidationException;
1111
import org.elasticsearch.common.io.stream.StreamInput;
12+
import org.elasticsearch.common.settings.Settings;
1213
import org.elasticsearch.xpack.core.ml.inference.assignment.AdaptiveAllocationsSettings;
1314

1415
import java.io.IOException;
1516
import java.util.Map;
1617

18+
import static org.elasticsearch.xpack.core.ml.inference.assignment.AdaptiveAllocationsSettings.DEFAULT_MAX_ALLOCATIONS;
19+
import static org.elasticsearch.xpack.core.ml.inference.assignment.AdaptiveAllocationsSettings.DEFAULT_MIN_ALLOCATIONS;
1720
import static org.elasticsearch.xpack.inference.services.elasticsearch.ElasticsearchInternalService.RERANKER_ID;
1821

1922
public class ElasticRerankerServiceSettings extends ElasticsearchInternalServiceSettings {
2023

2124
public static final String NAME = "elastic_reranker_service_settings";
2225

23-
public static ElasticRerankerServiceSettings defaultEndpointSettings() {
24-
return new ElasticRerankerServiceSettings(null, 1, RERANKER_ID, new AdaptiveAllocationsSettings(Boolean.TRUE, 0, 32));
26+
public static ElasticRerankerServiceSettings defaultEndpointSettings(Settings settings) {
27+
return new ElasticRerankerServiceSettings(
28+
null,
29+
1,
30+
RERANKER_ID,
31+
new AdaptiveAllocationsSettings(Boolean.TRUE, DEFAULT_MIN_ALLOCATIONS.get(settings), DEFAULT_MAX_ALLOCATIONS.get(settings))
32+
);
2533
}
2634

2735
public ElasticRerankerServiceSettings(ElasticsearchInternalServiceSettings other) {

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalService.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -923,22 +923,22 @@ private List<Model> defaultConfigs(boolean useLinuxOptimizedModel) {
923923
DEFAULT_ELSER_ID,
924924
TaskType.SPARSE_EMBEDDING,
925925
NAME,
926-
ElserInternalServiceSettings.defaultEndpointSettings(useLinuxOptimizedModel),
926+
ElserInternalServiceSettings.defaultEndpointSettings(useLinuxOptimizedModel, settings),
927927
ElserMlNodeTaskSettings.DEFAULT,
928928
ChunkingSettingsBuilder.DEFAULT_SETTINGS
929929
);
930930
var defaultE5 = new MultilingualE5SmallModel(
931931
DEFAULT_E5_ID,
932932
TaskType.TEXT_EMBEDDING,
933933
NAME,
934-
MultilingualE5SmallInternalServiceSettings.defaultEndpointSettings(useLinuxOptimizedModel),
934+
MultilingualE5SmallInternalServiceSettings.defaultEndpointSettings(useLinuxOptimizedModel, settings),
935935
ChunkingSettingsBuilder.DEFAULT_SETTINGS
936936
);
937937
var defaultRerank = new ElasticRerankerModel(
938938
DEFAULT_RERANK_ID,
939939
TaskType.RERANK,
940940
NAME,
941-
ElasticRerankerServiceSettings.defaultEndpointSettings(),
941+
ElasticRerankerServiceSettings.defaultEndpointSettings(settings),
942942
RerankTaskSettings.DEFAULT_SETTINGS
943943
);
944944
return List.of(defaultElser, defaultE5, defaultRerank);

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElserInternalServiceSettings.java

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,14 @@
1010
import org.elasticsearch.TransportVersion;
1111
import org.elasticsearch.TransportVersions;
1212
import org.elasticsearch.common.io.stream.StreamInput;
13+
import org.elasticsearch.common.settings.Settings;
1314
import org.elasticsearch.inference.MinimalServiceSettings;
1415
import org.elasticsearch.xpack.core.ml.inference.assignment.AdaptiveAllocationsSettings;
1516

1617
import java.io.IOException;
1718

19+
import static org.elasticsearch.xpack.core.ml.inference.assignment.AdaptiveAllocationsSettings.DEFAULT_MAX_ALLOCATIONS;
20+
import static org.elasticsearch.xpack.core.ml.inference.assignment.AdaptiveAllocationsSettings.DEFAULT_MIN_ALLOCATIONS;
1821
import static org.elasticsearch.xpack.inference.services.elasticsearch.ElserModels.ELSER_V2_MODEL;
1922
import static org.elasticsearch.xpack.inference.services.elasticsearch.ElserModels.ELSER_V2_MODEL_LINUX_X86;
2023

@@ -26,12 +29,12 @@ public static MinimalServiceSettings minimalServiceSettings() {
2629
return MinimalServiceSettings.sparseEmbedding(ElasticsearchInternalService.NAME);
2730
}
2831

29-
public static ElserInternalServiceSettings defaultEndpointSettings(boolean useLinuxOptimizedModel) {
32+
public static ElserInternalServiceSettings defaultEndpointSettings(boolean useLinuxOptimizedModel, Settings settings) {
3033
return new ElserInternalServiceSettings(
3134
null,
3235
1,
3336
useLinuxOptimizedModel ? ELSER_V2_MODEL_LINUX_X86 : ELSER_V2_MODEL,
34-
new AdaptiveAllocationsSettings(Boolean.TRUE, 0, 32)
37+
new AdaptiveAllocationsSettings(Boolean.TRUE, DEFAULT_MIN_ALLOCATIONS.get(settings), DEFAULT_MAX_ALLOCATIONS.get(settings))
3538
);
3639
}
3740

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/MultilingualE5SmallInternalServiceSettings.java

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
import org.elasticsearch.common.ValidationException;
1111
import org.elasticsearch.common.io.stream.StreamInput;
12+
import org.elasticsearch.common.settings.Settings;
1213
import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper;
1314
import org.elasticsearch.inference.MinimalServiceSettings;
1415
import org.elasticsearch.inference.SimilarityMeasure;
@@ -18,6 +19,8 @@
1819
import java.util.Arrays;
1920
import java.util.Map;
2021

22+
import static org.elasticsearch.xpack.core.ml.inference.assignment.AdaptiveAllocationsSettings.DEFAULT_MAX_ALLOCATIONS;
23+
import static org.elasticsearch.xpack.core.ml.inference.assignment.AdaptiveAllocationsSettings.DEFAULT_MIN_ALLOCATIONS;
2124
import static org.elasticsearch.xpack.inference.services.elasticsearch.ElasticsearchInternalService.MULTILINGUAL_E5_SMALL_MODEL_ID;
2225
import static org.elasticsearch.xpack.inference.services.elasticsearch.ElasticsearchInternalService.MULTILINGUAL_E5_SMALL_MODEL_ID_LINUX_X86;
2326

@@ -37,12 +40,12 @@ public static MinimalServiceSettings minimalServiceSettings() {
3740
);
3841
}
3942

40-
public static MultilingualE5SmallInternalServiceSettings defaultEndpointSettings(boolean useLinuxOptimizedModel) {
43+
public static MultilingualE5SmallInternalServiceSettings defaultEndpointSettings(boolean useLinuxOptimizedModel, Settings settings) {
4144
return new MultilingualE5SmallInternalServiceSettings(
4245
null,
4346
1,
4447
useLinuxOptimizedModel ? MULTILINGUAL_E5_SMALL_MODEL_ID_LINUX_X86 : MULTILINGUAL_E5_SMALL_MODEL_ID,
45-
new AdaptiveAllocationsSettings(Boolean.TRUE, 0, 32)
48+
new AdaptiveAllocationsSettings(Boolean.TRUE, DEFAULT_MIN_ALLOCATIONS.get(settings), DEFAULT_MAX_ALLOCATIONS.get(settings))
4649
);
4750
}
4851

0 commit comments

Comments
 (0)