Skip to content
Closed
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/changelog/127783.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 127783
summary: Default endpoint allocations are now configurable
area: Machine Learning
type: enhancement
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.io.stream.Writeable;
import org.elasticsearch.common.settings.Setting;
import org.elasticsearch.xcontent.ObjectParser;
import org.elasticsearch.xcontent.ParseField;
import org.elasticsearch.xcontent.ToXContentObject;
Expand All @@ -22,6 +23,24 @@

public class AdaptiveAllocationsSettings implements ToXContentObject, Writeable {

public static final Setting<Integer> DEFAULT_MIN_ALLOCATIONS = Setting.intSetting(
"xpack.ml.models.default_allocations.min",
0,
0,
32,
Setting.Property.Dynamic,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think dynamic will only be useful if we're going to allow changes without having to reboot. I think we typically add a addSettingsUpdateConsumer call so we can listen for the changes for example:

https://github.com/elastic/elasticsearch/blob/main/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/http/HttpSettings.java#L48

Setting.Property.NodeScope
);

public static final Setting<Integer> DEFAULT_MAX_ALLOCATIONS = Setting.intSetting(
"xpack.ml.models.default_allocations.max",
32,
1,
32,
Setting.Property.Dynamic,
Setting.Property.NodeScope
);

public static final AdaptiveAllocationsSettings RESET_PLACEHOLDER = new AdaptiveAllocationsSettings(false, -1, -1);

public static final ParseField ENABLED = new ParseField("enabled");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,19 +9,27 @@

import org.elasticsearch.common.ValidationException;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.xpack.core.ml.inference.assignment.AdaptiveAllocationsSettings;

import java.io.IOException;
import java.util.Map;

import static org.elasticsearch.xpack.core.ml.inference.assignment.AdaptiveAllocationsSettings.DEFAULT_MAX_ALLOCATIONS;
import static org.elasticsearch.xpack.core.ml.inference.assignment.AdaptiveAllocationsSettings.DEFAULT_MIN_ALLOCATIONS;
import static org.elasticsearch.xpack.inference.services.elasticsearch.ElasticsearchInternalService.RERANKER_ID;

public class ElasticRerankerServiceSettings extends ElasticsearchInternalServiceSettings {

public static final String NAME = "elastic_reranker_service_settings";

public static ElasticRerankerServiceSettings defaultEndpointSettings() {
return new ElasticRerankerServiceSettings(null, 1, RERANKER_ID, new AdaptiveAllocationsSettings(Boolean.TRUE, 0, 32));
public static ElasticRerankerServiceSettings defaultEndpointSettings(Settings settings) {
return new ElasticRerankerServiceSettings(
null,
1,
RERANKER_ID,
new AdaptiveAllocationsSettings(Boolean.TRUE, DEFAULT_MIN_ALLOCATIONS.get(settings), DEFAULT_MAX_ALLOCATIONS.get(settings))
);
}

public ElasticRerankerServiceSettings(ElasticsearchInternalServiceSettings other) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -923,22 +923,22 @@ private List<Model> defaultConfigs(boolean useLinuxOptimizedModel) {
DEFAULT_ELSER_ID,
TaskType.SPARSE_EMBEDDING,
NAME,
ElserInternalServiceSettings.defaultEndpointSettings(useLinuxOptimizedModel),
ElserInternalServiceSettings.defaultEndpointSettings(useLinuxOptimizedModel, settings),
ElserMlNodeTaskSettings.DEFAULT,
ChunkingSettingsBuilder.DEFAULT_SETTINGS
);
var defaultE5 = new MultilingualE5SmallModel(
DEFAULT_E5_ID,
TaskType.TEXT_EMBEDDING,
NAME,
MultilingualE5SmallInternalServiceSettings.defaultEndpointSettings(useLinuxOptimizedModel),
MultilingualE5SmallInternalServiceSettings.defaultEndpointSettings(useLinuxOptimizedModel, settings),
ChunkingSettingsBuilder.DEFAULT_SETTINGS
);
var defaultRerank = new ElasticRerankerModel(
DEFAULT_RERANK_ID,
TaskType.RERANK,
NAME,
ElasticRerankerServiceSettings.defaultEndpointSettings(),
ElasticRerankerServiceSettings.defaultEndpointSettings(settings),
RerankTaskSettings.DEFAULT_SETTINGS
);
return List.of(defaultElser, defaultE5, defaultRerank);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,14 @@
import org.elasticsearch.TransportVersion;
import org.elasticsearch.TransportVersions;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.inference.MinimalServiceSettings;
import org.elasticsearch.xpack.core.ml.inference.assignment.AdaptiveAllocationsSettings;

import java.io.IOException;

import static org.elasticsearch.xpack.core.ml.inference.assignment.AdaptiveAllocationsSettings.DEFAULT_MAX_ALLOCATIONS;
import static org.elasticsearch.xpack.core.ml.inference.assignment.AdaptiveAllocationsSettings.DEFAULT_MIN_ALLOCATIONS;
import static org.elasticsearch.xpack.inference.services.elasticsearch.ElserModels.ELSER_V2_MODEL;
import static org.elasticsearch.xpack.inference.services.elasticsearch.ElserModels.ELSER_V2_MODEL_LINUX_X86;

Expand All @@ -26,12 +29,12 @@ public static MinimalServiceSettings minimalServiceSettings() {
return MinimalServiceSettings.sparseEmbedding(ElasticsearchInternalService.NAME);
}

public static ElserInternalServiceSettings defaultEndpointSettings(boolean useLinuxOptimizedModel) {
public static ElserInternalServiceSettings defaultEndpointSettings(boolean useLinuxOptimizedModel, Settings settings) {
return new ElserInternalServiceSettings(
null,
1,
useLinuxOptimizedModel ? ELSER_V2_MODEL_LINUX_X86 : ELSER_V2_MODEL,
new AdaptiveAllocationsSettings(Boolean.TRUE, 0, 32)
new AdaptiveAllocationsSettings(Boolean.TRUE, DEFAULT_MIN_ALLOCATIONS.get(settings), DEFAULT_MAX_ALLOCATIONS.get(settings))
);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

import org.elasticsearch.common.ValidationException;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper;
import org.elasticsearch.inference.MinimalServiceSettings;
import org.elasticsearch.inference.SimilarityMeasure;
Expand All @@ -18,6 +19,8 @@
import java.util.Arrays;
import java.util.Map;

import static org.elasticsearch.xpack.core.ml.inference.assignment.AdaptiveAllocationsSettings.DEFAULT_MAX_ALLOCATIONS;
import static org.elasticsearch.xpack.core.ml.inference.assignment.AdaptiveAllocationsSettings.DEFAULT_MIN_ALLOCATIONS;
import static org.elasticsearch.xpack.inference.services.elasticsearch.ElasticsearchInternalService.MULTILINGUAL_E5_SMALL_MODEL_ID;
import static org.elasticsearch.xpack.inference.services.elasticsearch.ElasticsearchInternalService.MULTILINGUAL_E5_SMALL_MODEL_ID_LINUX_X86;

Expand All @@ -37,12 +40,12 @@ public static MinimalServiceSettings minimalServiceSettings() {
);
}

public static MultilingualE5SmallInternalServiceSettings defaultEndpointSettings(boolean useLinuxOptimizedModel) {
public static MultilingualE5SmallInternalServiceSettings defaultEndpointSettings(boolean useLinuxOptimizedModel, Settings settings) {
return new MultilingualE5SmallInternalServiceSettings(
null,
1,
useLinuxOptimizedModel ? MULTILINGUAL_E5_SMALL_MODEL_ID_LINUX_X86 : MULTILINGUAL_E5_SMALL_MODEL_ID,
new AdaptiveAllocationsSettings(Boolean.TRUE, 0, 32)
new AdaptiveAllocationsSettings(Boolean.TRUE, DEFAULT_MIN_ALLOCATIONS.get(settings), DEFAULT_MAX_ALLOCATIONS.get(settings))
);
}

Expand Down
Loading