Skip to content

Commit 4d59064

Browse files
authored
[ML] Default inference endpoint for ELSER (#113873)
Adds a default configuration for the ELSER model. The config uses adaptive allocations to automatically scale. Min number of allocations is set to 1 for this PR, a follow up with change that to 0 and enable scale from 0. This end point is always visible in the GET API. ``` GET _inference { "endpoints": [ { "inference_id": ".elser-2", "task_type": "sparse_embedding", "service": "elser", "service_settings": { "num_threads": 1, "model_id": ".elser_model_2", "adaptive_allocations": { "enabled": true, "min_number_of_allocations": 1, "max_number_of_allocations": 8 } }, "task_settings": {} } ] } ``` The default configuration can be used against without any prior setup. If the model is not downloaded it is automatically downloaded. If it is not deployed it is deployed ``` POST _inference/.elser-2 { "input": "Automagically deploy and infer" } ... { "sparse_embedding": [ { "is_truncated": false, "embedding": { "##fer": 2.2107008, "deployment": 2.1624098, "deploy": 2.144009, "auto": 1.9384763, ``` ### Follow up tasks - [ ] Add default config for the E5 text embedding model - [ ] Select platform specific version - [ ] Scale from 0 - [ ] Chunking settings - What happens when the end point is deleted, can it be deleted? - Can the default config be modified - chunking settings for example? Probably not
1 parent 4438e49 commit 4d59064

File tree

29 files changed

+745
-136
lines changed

29 files changed

+745
-136
lines changed

docs/changelog/113873.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 113873
2+
summary: Default inference endpoint for ELSER
3+
area: Machine Learning
4+
type: enhancement
5+
issues: []

docs/reference/rest-api/usage.asciidoc

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,12 @@ GET /_xpack/usage
206206
"inference": {
207207
"available" : true,
208208
"enabled" : true,
209-
"models" : []
209+
"models" : [{
210+
"service": "elasticsearch",
211+
"task_type": "SPARSE_EMBEDDING",
212+
"count": 1
213+
}
214+
]
210215
},
211216
"logstash" : {
212217
"available" : true,

server/src/main/java/org/elasticsearch/inference/InferenceService.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,4 +191,13 @@ default Set<TaskType> supportedStreamingTasks() {
191191
default boolean canStream(TaskType taskType) {
192192
return supportedStreamingTasks().contains(taskType);
193193
}
194+
195+
/**
196+
* A service can define default configurations that can be
197+
* used out of the box without creating an endpoint first.
198+
* @return Default configurations provided by this service
199+
*/
200+
default List<UnparsedModel> defaultConfigs() {
201+
return List.of();
202+
}
194203
}
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
10+
package org.elasticsearch.inference;
11+
12+
import java.util.Map;
13+
14+
/**
15+
* Semi parsed model where inference entity id, task type and service
16+
* are known but the settings are not parsed.
17+
*/
18+
public record UnparsedModel(
19+
String inferenceEntityId,
20+
TaskType taskType,
21+
String service,
22+
Map<String, Object> settings,
23+
Map<String, Object> secrets
24+
) {}

test/test-clusters/src/main/java/org/elasticsearch/test/cluster/FeatureFlag.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@ public enum FeatureFlag {
1919
TIME_SERIES_MODE("es.index_mode_feature_flag_registered=true", Version.fromString("8.0.0"), null),
2020
FAILURE_STORE_ENABLED("es.failure_store_feature_flag_enabled=true", Version.fromString("8.12.0"), null),
2121
CHUNKING_SETTINGS_ENABLED("es.inference_chunking_settings_feature_flag_enabled=true", Version.fromString("8.16.0"), null),
22-
INFERENCE_SCALE_TO_ZERO("es.inference_scale_to_zero_feature_flag_enabled=true", Version.fromString("8.16.0"), null);
22+
INFERENCE_SCALE_TO_ZERO("es.inference_scale_to_zero_feature_flag_enabled=true", Version.fromString("8.16.0"), null),
23+
INFERENCE_DEFAULT_ELSER("es.inference_default_elser_feature_flag_enabled=true", Version.fromString("8.16.0"), null);
2324

2425
public final String systemProperty;
2526
public final Version from;

x-pack/plugin/build.gradle

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,5 +83,6 @@ tasks.named("precommit").configure {
8383
tasks.named("yamlRestCompatTestTransform").configure({ task ->
8484
task.skipTest("security/10_forbidden/Test bulk response with invalid credentials", "warning does not exist for compatibility")
8585
task.skipTest("wildcard/30_ignore_above_synthetic_source/wildcard field type ignore_above", "Temporary until backported")
86+
task.skipTest("inference/inference_crud/Test get all", "Assertions on number of inference models break due to default configs")
8687
})
8788

x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/action/StartTrainedModelDeploymentAction.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -237,7 +237,9 @@ public int computeNumberOfAllocations() {
237237
if (numberOfAllocations != null) {
238238
return numberOfAllocations;
239239
} else {
240-
if (adaptiveAllocationsSettings == null || adaptiveAllocationsSettings.getMinNumberOfAllocations() == null) {
240+
if (adaptiveAllocationsSettings == null
241+
|| adaptiveAllocationsSettings.getMinNumberOfAllocations() == null
242+
|| adaptiveAllocationsSettings.getMinNumberOfAllocations() == 0) {
241243
return DEFAULT_NUM_ALLOCATIONS;
242244
} else {
243245
return adaptiveAllocationsSettings.getMinNumberOfAllocations();

x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/inference/assignment/AdaptiveAllocationsSettings.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -147,8 +147,8 @@ public AdaptiveAllocationsSettings merge(AdaptiveAllocationsSettings updates) {
147147
public ActionRequestValidationException validate() {
148148
ActionRequestValidationException validationException = new ActionRequestValidationException();
149149
boolean hasMinNumberOfAllocations = (minNumberOfAllocations != null && minNumberOfAllocations != -1);
150-
if (hasMinNumberOfAllocations && minNumberOfAllocations < 1) {
151-
validationException.addValidationError("[" + MIN_NUMBER_OF_ALLOCATIONS + "] must be a positive integer or null");
150+
if (hasMinNumberOfAllocations && minNumberOfAllocations < 0) {
151+
validationException.addValidationError("[" + MIN_NUMBER_OF_ALLOCATIONS + "] must be a non-negative integer or null");
152152
}
153153
boolean hasMaxNumberOfAllocations = (maxNumberOfAllocations != null && maxNumberOfAllocations != -1);
154154
if (hasMaxNumberOfAllocations && maxNumberOfAllocations < 1) {

x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/inference/assignment/AdaptiveAllocationSettingsTests.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ public class AdaptiveAllocationSettingsTests extends AbstractWireSerializingTest
1717
public static AdaptiveAllocationsSettings testInstance() {
1818
return new AdaptiveAllocationsSettings(
1919
randomBoolean() ? null : randomBoolean(),
20-
randomBoolean() ? null : randomIntBetween(1, 2),
20+
randomBoolean() ? null : randomIntBetween(0, 2),
2121
randomBoolean() ? null : randomIntBetween(2, 4)
2222
);
2323
}

x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/CustomElandModelIT.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ public void testSparse() throws IOException {
8585

8686
var inferenceId = "sparse-inf";
8787
putModel(inferenceId, inferenceConfig, TaskType.SPARSE_EMBEDDING);
88-
var results = inferOnMockService(inferenceId, List.of("washing", "machine"));
88+
var results = infer(inferenceId, List.of("washing", "machine"));
8989
deleteModel(inferenceId);
9090
assertNotNull(results.get("sparse_embedding"));
9191
}

0 commit comments

Comments
 (0)