From 7d658e4d75cc10c5bbc9da2f23e040ec64e07497 Mon Sep 17 00:00:00 2001 From: David Kyle Date: Mon, 7 Oct 2024 14:57:51 +0100 Subject: [PATCH 1/6] Default E5 endpoint # Conflicts: # x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalServiceTests.java --- ...ltElserIT.java => DefaultEndPointsIT.java} | 41 +++++++++++++++++-- .../BaseElasticsearchInternalService.java | 2 +- .../ElasticsearchInternalService.java | 32 ++++++++++++++- .../ElasticsearchInternalServiceTests.java | 7 ++++ ...portStartTrainedModelDeploymentAction.java | 4 +- 5 files changed, 78 insertions(+), 8 deletions(-) rename x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/{DefaultElserIT.java => DefaultEndPointsIT.java} (57%) diff --git a/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/DefaultElserIT.java b/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/DefaultEndPointsIT.java similarity index 57% rename from x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/DefaultElserIT.java rename to x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/DefaultEndPointsIT.java index 5d84aad4b7344..083bad2c91613 100644 --- a/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/DefaultElserIT.java +++ b/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/DefaultEndPointsIT.java @@ -22,13 +22,13 @@ import static org.hamcrest.Matchers.is; import static org.hamcrest.Matchers.oneOf; -public class DefaultElserIT extends InferenceBaseRestTest { +public class DefaultEndPointsIT extends InferenceBaseRestTest { private TestThreadPool threadPool; @Before public void createThreadPool() { - threadPool = new TestThreadPool(DefaultElserIT.class.getSimpleName()); + threadPool = new TestThreadPool(DefaultEndPointsIT.class.getSimpleName()); } @After @@ -38,7 +38,7 @@ public void tearDown() throws Exception { } @SuppressWarnings("unchecked") - public void testInferCreatesDefaultElser() throws IOException { + public void testInferDeploysDefaultElser() throws IOException { assumeTrue("Default config requires a feature flag", DefaultElserFeatureFlag.isEnabled()); var model = getModel(ElasticsearchInternalService.DEFAULT_ELSER_ID); assertDefaultElserConfig(model); @@ -67,4 +67,39 @@ private static void assertDefaultElserConfig(Map modelConfig) { Matchers.is(Map.of("enabled", true, "min_number_of_allocations", 1, "max_number_of_allocations", 8)) ); } + + @SuppressWarnings("unchecked") + public void testInferDeploysDefaultE5() throws IOException { + assumeTrue("Default config requires a feature flag", DefaultElserFeatureFlag.isEnabled()); + var model = getModel(ElasticsearchInternalService.DEFAULT_E5_ID); + assertDefaultE5Config(model); + + var inputs = List.of("Hello World", "Goodnight moon"); + var queryParams = Map.of("timeout", "120s"); + var results = infer(ElasticsearchInternalService.DEFAULT_E5_ID, TaskType.TEXT_EMBEDDING, inputs, queryParams); + var embeddings = (List>) results.get("text_embedding"); + assertThat(results.toString(), embeddings, hasSize(2)); + } + + @SuppressWarnings("unchecked") + private static void assertDefaultE5Config(Map modelConfig) { + assertEquals(modelConfig.toString(), ElasticsearchInternalService.DEFAULT_E5_ID, modelConfig.get("inference_id")); + assertEquals(modelConfig.toString(), ElasticsearchInternalService.NAME, modelConfig.get("service")); + assertEquals(modelConfig.toString(), TaskType.TEXT_EMBEDDING.toString(), modelConfig.get("task_type")); + + var serviceSettings = (Map) modelConfig.get("service_settings"); + assertThat( + modelConfig.toString(), + serviceSettings.get("model_id"), + is(oneOf(".multilingual-e5-small", ".multilingual-e5-small_linux-x86_64")) + ); + assertEquals(modelConfig.toString(), 1, serviceSettings.get("num_threads")); + + var adaptiveAllocations = (Map) serviceSettings.get("adaptive_allocations"); + assertThat( + modelConfig.toString(), + adaptiveAllocations, + Matchers.is(Map.of("enabled", true, "min_number_of_allocations", 1, "max_number_of_allocations", 8)) + ); + } } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/BaseElasticsearchInternalService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/BaseElasticsearchInternalService.java index 881e2e82b766a..c866bb60c837f 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/BaseElasticsearchInternalService.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/BaseElasticsearchInternalService.java @@ -259,7 +259,7 @@ public static InferModelAction.Request buildInferenceRequest( return request; } - protected abstract boolean isDefaultId(String inferenceId); + abstract boolean isDefaultId(String inferenceId); protected void maybeStartDeployment( ElasticsearchInternalModel model, diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalService.java index 9a4201842873e..d8ba352225ee1 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalService.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalService.java @@ -78,6 +78,7 @@ public class ElasticsearchInternalService extends BaseElasticsearchInternalServi public static final int EMBEDDING_MAX_BATCH_SIZE = 10; public static final String DEFAULT_ELSER_ID = ".elser-2"; + public static final String DEFAULT_E5_ID = ".default-multilingual-e5-small"; // TODO what to name this private static final Logger logger = LogManager.getLogger(ElasticsearchInternalService.class); private static final DeprecationLogger DEPRECATION_LOGGER = DeprecationLogger.getLogger(ElasticsearchInternalService.class); @@ -815,6 +816,26 @@ public List defaultConfigs() { ) ); + // TODO Chunking settings + Map e5Settings = Map.of( + ModelConfigurations.SERVICE_SETTINGS, + Map.of( + ElasticsearchInternalServiceSettings.MODEL_ID, + MULTILINGUAL_E5_SMALL_MODEL_ID, // TODO pick model depending on platform + ElasticsearchInternalServiceSettings.NUM_THREADS, + 1, + ElasticsearchInternalServiceSettings.ADAPTIVE_ALLOCATIONS, + Map.of( + "enabled", + Boolean.TRUE, + "min_number_of_allocations", + 1, + "max_number_of_allocations", + 8 // no max? + ) + ) + ); + return List.of( new UnparsedModel( DEFAULT_ELSER_ID, @@ -822,13 +843,20 @@ public List defaultConfigs() { NAME, elserSettings, Map.of() // no secrets + ), + new UnparsedModel( + DEFAULT_E5_ID, + TaskType.TEXT_EMBEDDING, + NAME, + e5Settings, + Map.of() // no secrets ) ); } @Override - protected boolean isDefaultId(String inferenceId) { - return DEFAULT_ELSER_ID.equals(inferenceId); + boolean isDefaultId(String inferenceId) { + return DEFAULT_ELSER_ID.equals(inferenceId) || DEFAULT_E5_ID.equals(inferenceId); } static EmbeddingRequestChunker.EmbeddingType embeddingTypeFromTaskTypeAndSettings( diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalServiceTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalServiceTests.java index 61645613b8722..f891b6018aace 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalServiceTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalServiceTests.java @@ -1541,6 +1541,13 @@ public void testEmbeddingTypeFromTaskTypeAndSettings() { assertThat(e.getMessage(), containsString("Chunking is not supported for task type [completion]")); } + public void testIsDefaultId() { + var service = createService(mock(Client.class)); + assertTrue(service.isDefaultId(".elser-2")); + assertTrue(service.isDefaultId(".default-multilingual-e5-small")); // TODO name? + assertFalse(service.isDefaultId("foo")); + } + private ElasticsearchInternalService createService(Client client) { var context = new InferenceServiceExtension.InferenceServiceFactoryContext(client, threadPool); return new ElasticsearchInternalService(context); diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportStartTrainedModelDeploymentAction.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportStartTrainedModelDeploymentAction.java index e130b13f4ec30..0bda2de2ce9ae 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportStartTrainedModelDeploymentAction.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportStartTrainedModelDeploymentAction.java @@ -234,9 +234,9 @@ protected void masterOperation( if (getModelResponse.getResources().results().size() > 1) { listener.onFailure( ExceptionsHelper.badRequestException( - "cannot deploy more than one models at the same time; [{}] matches [{}] models]", + "cannot deploy more than one model at the same time; [{}] matches models [{}]", request.getModelId(), - getModelResponse.getResources().results().size() + getModelResponse.getResources().results().stream().map(TrainedModelConfig::getModelId).toList() ) ); return; From b15c3cc3c436202de36ab82aad0faa98fd646a6e Mon Sep 17 00:00:00 2001 From: David Kyle Date: Mon, 7 Oct 2024 20:15:10 +0100 Subject: [PATCH 2/6] test fixes --- docs/reference/rest-api/usage.asciidoc | 7 ++++++- .../rest-api-spec/test/inference/inference_crud.yml | 11 +++++------ 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/docs/reference/rest-api/usage.asciidoc b/docs/reference/rest-api/usage.asciidoc index 957f57ffc9105..5fd2304ff9378 100644 --- a/docs/reference/rest-api/usage.asciidoc +++ b/docs/reference/rest-api/usage.asciidoc @@ -210,7 +210,12 @@ GET /_xpack/usage "service": "elasticsearch", "task_type": "SPARSE_EMBEDDING", "count": 1 - } + }, + { + "service": "elasticsearch", + "task_type": "TEXT_EMBEDDING", + "count": 1 + }, ] }, "logstash" : { diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/inference/inference_crud.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/inference/inference_crud.yml index 11be68cc764e2..fdabf27906c52 100644 --- a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/inference/inference_crud.yml +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/inference/inference_crud.yml @@ -44,18 +44,17 @@ - do: inference.get: inference_id: "*" - - length: { endpoints: 1} - - match: { endpoints.0.inference_id: ".elser-2" } + - length: { endpoints: 2} + - match: { endpoints.0.inference_id: ".default-multilingual-e5-small" } + - match: { endpoints.1.inference_id: ".elser-2" } - do: inference.get: inference_id: _all - - length: { endpoints: 1} - - match: { endpoints.0.inference_id: ".elser-2" } + - length: { endpoints: 2} - do: inference.get: inference_id: "" - - length: { endpoints: 1} - - match: { endpoints.0.inference_id: ".elser-2" } + - length: { endpoints: 2} From e9468c3b3435fd853432540165197f05e8072124 Mon Sep 17 00:00:00 2001 From: David Kyle Date: Sat, 12 Oct 2024 16:23:17 +0100 Subject: [PATCH 3/6] rename --- .../services/elasticsearch/ElasticsearchInternalService.java | 2 +- .../elasticsearch/ElasticsearchInternalServiceTests.java | 2 +- .../resources/rest-api-spec/test/inference/inference_crud.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalService.java index d8ba352225ee1..7e11ff39d31b8 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalService.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalService.java @@ -78,7 +78,7 @@ public class ElasticsearchInternalService extends BaseElasticsearchInternalServi public static final int EMBEDDING_MAX_BATCH_SIZE = 10; public static final String DEFAULT_ELSER_ID = ".elser-2"; - public static final String DEFAULT_E5_ID = ".default-multilingual-e5-small"; // TODO what to name this + public static final String DEFAULT_E5_ID = ".multi-e5-small"; private static final Logger logger = LogManager.getLogger(ElasticsearchInternalService.class); private static final DeprecationLogger DEPRECATION_LOGGER = DeprecationLogger.getLogger(ElasticsearchInternalService.class); diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalServiceTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalServiceTests.java index f891b6018aace..1729d99086e79 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalServiceTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalServiceTests.java @@ -1544,7 +1544,7 @@ public void testEmbeddingTypeFromTaskTypeAndSettings() { public void testIsDefaultId() { var service = createService(mock(Client.class)); assertTrue(service.isDefaultId(".elser-2")); - assertTrue(service.isDefaultId(".default-multilingual-e5-small")); // TODO name? + assertTrue(service.isDefaultId(".multi-e5-small")); assertFalse(service.isDefaultId("foo")); } diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/inference/inference_crud.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/inference/inference_crud.yml index fdabf27906c52..3e59fe3622ec3 100644 --- a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/inference/inference_crud.yml +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/inference/inference_crud.yml @@ -45,7 +45,7 @@ inference.get: inference_id: "*" - length: { endpoints: 2} - - match: { endpoints.0.inference_id: ".default-multilingual-e5-small" } + - match: { endpoints.0.inference_id: ".multi-e5-small" } - match: { endpoints.1.inference_id: ".elser-2" } - do: From 626356a16d856d0614b78bfd54bc0d20743839f2 Mon Sep 17 00:00:00 2001 From: David Kyle Date: Sun, 13 Oct 2024 08:47:18 +0100 Subject: [PATCH 4/6] Update docs/changelog/114683.yaml --- docs/changelog/114683.yaml | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 docs/changelog/114683.yaml diff --git a/docs/changelog/114683.yaml b/docs/changelog/114683.yaml new file mode 100644 index 0000000000000..a677e65a12b0e --- /dev/null +++ b/docs/changelog/114683.yaml @@ -0,0 +1,5 @@ +pr: 114683 +summary: Default inference endpoint for the multilingual-e5-small model +area: Machine Learning +type: enhancement +issues: [] From 859f6b194f3cc110060e73ffae40499c8e75ff85 Mon Sep 17 00:00:00 2001 From: David Kyle Date: Mon, 14 Oct 2024 22:01:46 +0100 Subject: [PATCH 5/6] fix model id check --- .../BaseElasticsearchInternalService.java | 7 +++- .../ElasticsearchInternalService.java | 41 ++++++++----------- 2 files changed, 23 insertions(+), 25 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/BaseElasticsearchInternalService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/BaseElasticsearchInternalService.java index bbfe3eb53f7c9..98777e9722242 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/BaseElasticsearchInternalService.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/BaseElasticsearchInternalService.java @@ -239,7 +239,12 @@ private void preferredVariantFromPlatformArchitecture(ActionListener defaultConfigIds() { - return List.of(new DefaultConfigId(DEFAULT_ELSER_ID, TaskType.SPARSE_EMBEDDING, this)); + return List.of( + new DefaultConfigId(DEFAULT_ELSER_ID, TaskType.SPARSE_EMBEDDING, this), + new DefaultConfigId(DEFAULT_E5_ID, TaskType.TEXT_EMBEDDING, this) + ); } /** @@ -839,8 +821,19 @@ private List defaultConfigs(boolean useLinuxOptimizedModel) { ElserMlNodeTaskSettings.DEFAULT, null // default chunking settings ); - - return List.of(defaultElser); + var defaultE5 = new MultilingualE5SmallModel( + DEFAULT_E5_ID, + TaskType.TEXT_EMBEDDING, + NAME, + new MultilingualE5SmallInternalServiceSettings( + null, + 1, + useLinuxOptimizedModel ? MULTILINGUAL_E5_SMALL_MODEL_ID_LINUX_X86 : MULTILINGUAL_E5_SMALL_MODEL_ID, + new AdaptiveAllocationsSettings(Boolean.TRUE, 1, 8) + ), + null // default chunking settings + ); + return List.of(defaultElser, defaultE5); } @Override From 77ecbb43d2c0a0760abf523beaa21128fe31bb2b Mon Sep 17 00:00:00 2001 From: David Kyle Date: Mon, 14 Oct 2024 22:53:35 +0100 Subject: [PATCH 6/6] test --- .../org/elasticsearch/xpack/inference/InferenceCrudIT.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/InferenceCrudIT.java b/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/InferenceCrudIT.java index 98c8d43707219..cbc50c361e3b5 100644 --- a/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/InferenceCrudIT.java +++ b/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/InferenceCrudIT.java @@ -40,7 +40,7 @@ public void testCRUD() throws IOException { } var getAllModels = getAllModels(); - int numModels = DefaultElserFeatureFlag.isEnabled() ? 10 : 9; + int numModels = DefaultElserFeatureFlag.isEnabled() ? 11 : 9; assertThat(getAllModels, hasSize(numModels)); var getSparseModels = getModels("_all", TaskType.SPARSE_EMBEDDING); @@ -51,7 +51,8 @@ public void testCRUD() throws IOException { } var getDenseModels = getModels("_all", TaskType.TEXT_EMBEDDING); - assertThat(getDenseModels, hasSize(4)); + int numDenseModels = DefaultElserFeatureFlag.isEnabled() ? 5 : 4; + assertThat(getDenseModels, hasSize(numDenseModels)); for (var denseModel : getDenseModels) { assertEquals("text_embedding", denseModel.get("task_type")); }