diff --git a/docs/changelog/127783.yaml b/docs/changelog/127783.yaml new file mode 100644 index 0000000000000..beecb2dcfa917 --- /dev/null +++ b/docs/changelog/127783.yaml @@ -0,0 +1,5 @@ +pr: 127783 +summary: Default endpoint allocations are now configurable +area: Machine Learning +type: enhancement +issues: [] diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/inference/assignment/AdaptiveAllocationsSettings.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/inference/assignment/AdaptiveAllocationsSettings.java index d4eace8e96621..e8a840aaaffea 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/inference/assignment/AdaptiveAllocationsSettings.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/inference/assignment/AdaptiveAllocationsSettings.java @@ -11,6 +11,7 @@ import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.common.settings.Setting; import org.elasticsearch.xcontent.ObjectParser; import org.elasticsearch.xcontent.ParseField; import org.elasticsearch.xcontent.ToXContentObject; @@ -22,6 +23,24 @@ public class AdaptiveAllocationsSettings implements ToXContentObject, Writeable { + public static final Setting DEFAULT_MIN_ALLOCATIONS = Setting.intSetting( + "xpack.ml.models.default_allocations.min", + 0, + 0, + 32, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + + public static final Setting DEFAULT_MAX_ALLOCATIONS = Setting.intSetting( + "xpack.ml.models.default_allocations.max", + 32, + 1, + 32, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + public static final AdaptiveAllocationsSettings RESET_PLACEHOLDER = new AdaptiveAllocationsSettings(false, -1, -1); public static final ParseField ENABLED = new ParseField("enabled"); diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticRerankerServiceSettings.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticRerankerServiceSettings.java index 2b7904e615682..9660536a7a913 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticRerankerServiceSettings.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticRerankerServiceSettings.java @@ -9,19 +9,27 @@ import org.elasticsearch.common.ValidationException; import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.settings.Settings; import org.elasticsearch.xpack.core.ml.inference.assignment.AdaptiveAllocationsSettings; import java.io.IOException; import java.util.Map; +import static org.elasticsearch.xpack.core.ml.inference.assignment.AdaptiveAllocationsSettings.DEFAULT_MAX_ALLOCATIONS; +import static org.elasticsearch.xpack.core.ml.inference.assignment.AdaptiveAllocationsSettings.DEFAULT_MIN_ALLOCATIONS; import static org.elasticsearch.xpack.inference.services.elasticsearch.ElasticsearchInternalService.RERANKER_ID; public class ElasticRerankerServiceSettings extends ElasticsearchInternalServiceSettings { public static final String NAME = "elastic_reranker_service_settings"; - public static ElasticRerankerServiceSettings defaultEndpointSettings() { - return new ElasticRerankerServiceSettings(null, 1, RERANKER_ID, new AdaptiveAllocationsSettings(Boolean.TRUE, 0, 32)); + public static ElasticRerankerServiceSettings defaultEndpointSettings(Settings settings) { + return new ElasticRerankerServiceSettings( + null, + 1, + RERANKER_ID, + new AdaptiveAllocationsSettings(Boolean.TRUE, DEFAULT_MIN_ALLOCATIONS.get(settings), DEFAULT_MAX_ALLOCATIONS.get(settings)) + ); } public ElasticRerankerServiceSettings(ElasticsearchInternalServiceSettings other) { diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalService.java index 7435430f8af43..6ba4ad82006f0 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalService.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalService.java @@ -923,7 +923,7 @@ private List defaultConfigs(boolean useLinuxOptimizedModel) { DEFAULT_ELSER_ID, TaskType.SPARSE_EMBEDDING, NAME, - ElserInternalServiceSettings.defaultEndpointSettings(useLinuxOptimizedModel), + ElserInternalServiceSettings.defaultEndpointSettings(useLinuxOptimizedModel, settings), ElserMlNodeTaskSettings.DEFAULT, ChunkingSettingsBuilder.DEFAULT_SETTINGS ); @@ -931,14 +931,14 @@ private List defaultConfigs(boolean useLinuxOptimizedModel) { DEFAULT_E5_ID, TaskType.TEXT_EMBEDDING, NAME, - MultilingualE5SmallInternalServiceSettings.defaultEndpointSettings(useLinuxOptimizedModel), + MultilingualE5SmallInternalServiceSettings.defaultEndpointSettings(useLinuxOptimizedModel, settings), ChunkingSettingsBuilder.DEFAULT_SETTINGS ); var defaultRerank = new ElasticRerankerModel( DEFAULT_RERANK_ID, TaskType.RERANK, NAME, - ElasticRerankerServiceSettings.defaultEndpointSettings(), + ElasticRerankerServiceSettings.defaultEndpointSettings(settings), RerankTaskSettings.DEFAULT_SETTINGS ); return List.of(defaultElser, defaultE5, defaultRerank); diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElserInternalServiceSettings.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElserInternalServiceSettings.java index d55a3d24632e4..8e7171fe4b70e 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElserInternalServiceSettings.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElserInternalServiceSettings.java @@ -10,11 +10,14 @@ import org.elasticsearch.TransportVersion; import org.elasticsearch.TransportVersions; import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.settings.Settings; import org.elasticsearch.inference.MinimalServiceSettings; import org.elasticsearch.xpack.core.ml.inference.assignment.AdaptiveAllocationsSettings; import java.io.IOException; +import static org.elasticsearch.xpack.core.ml.inference.assignment.AdaptiveAllocationsSettings.DEFAULT_MAX_ALLOCATIONS; +import static org.elasticsearch.xpack.core.ml.inference.assignment.AdaptiveAllocationsSettings.DEFAULT_MIN_ALLOCATIONS; import static org.elasticsearch.xpack.inference.services.elasticsearch.ElserModels.ELSER_V2_MODEL; import static org.elasticsearch.xpack.inference.services.elasticsearch.ElserModels.ELSER_V2_MODEL_LINUX_X86; @@ -26,12 +29,12 @@ public static MinimalServiceSettings minimalServiceSettings() { return MinimalServiceSettings.sparseEmbedding(ElasticsearchInternalService.NAME); } - public static ElserInternalServiceSettings defaultEndpointSettings(boolean useLinuxOptimizedModel) { + public static ElserInternalServiceSettings defaultEndpointSettings(boolean useLinuxOptimizedModel, Settings settings) { return new ElserInternalServiceSettings( null, 1, useLinuxOptimizedModel ? ELSER_V2_MODEL_LINUX_X86 : ELSER_V2_MODEL, - new AdaptiveAllocationsSettings(Boolean.TRUE, 0, 32) + new AdaptiveAllocationsSettings(Boolean.TRUE, DEFAULT_MIN_ALLOCATIONS.get(settings), DEFAULT_MAX_ALLOCATIONS.get(settings)) ); } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/MultilingualE5SmallInternalServiceSettings.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/MultilingualE5SmallInternalServiceSettings.java index 5d5875670947d..c0ac8daf4e5cc 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/MultilingualE5SmallInternalServiceSettings.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/MultilingualE5SmallInternalServiceSettings.java @@ -9,6 +9,7 @@ import org.elasticsearch.common.ValidationException; import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper; import org.elasticsearch.inference.MinimalServiceSettings; import org.elasticsearch.inference.SimilarityMeasure; @@ -18,6 +19,8 @@ import java.util.Arrays; import java.util.Map; +import static org.elasticsearch.xpack.core.ml.inference.assignment.AdaptiveAllocationsSettings.DEFAULT_MAX_ALLOCATIONS; +import static org.elasticsearch.xpack.core.ml.inference.assignment.AdaptiveAllocationsSettings.DEFAULT_MIN_ALLOCATIONS; import static org.elasticsearch.xpack.inference.services.elasticsearch.ElasticsearchInternalService.MULTILINGUAL_E5_SMALL_MODEL_ID; import static org.elasticsearch.xpack.inference.services.elasticsearch.ElasticsearchInternalService.MULTILINGUAL_E5_SMALL_MODEL_ID_LINUX_X86; @@ -37,12 +40,12 @@ public static MinimalServiceSettings minimalServiceSettings() { ); } - public static MultilingualE5SmallInternalServiceSettings defaultEndpointSettings(boolean useLinuxOptimizedModel) { + public static MultilingualE5SmallInternalServiceSettings defaultEndpointSettings(boolean useLinuxOptimizedModel, Settings settings) { return new MultilingualE5SmallInternalServiceSettings( null, 1, useLinuxOptimizedModel ? MULTILINGUAL_E5_SMALL_MODEL_ID_LINUX_X86 : MULTILINGUAL_E5_SMALL_MODEL_ID, - new AdaptiveAllocationsSettings(Boolean.TRUE, 0, 32) + new AdaptiveAllocationsSettings(Boolean.TRUE, DEFAULT_MIN_ALLOCATIONS.get(settings), DEFAULT_MAX_ALLOCATIONS.get(settings)) ); }