From c40aab29c99579d126e144bdacf343fd9e31f0df Mon Sep 17 00:00:00 2001 From: Jonathan Buttner Date: Fri, 1 Aug 2025 17:40:22 -0400 Subject: [PATCH 1/7] Working on tests --- .../integration/InferenceIndicesIT.java | 241 ++++++++++++++++++ .../xpack/inference/InferenceIndex.java | 17 +- .../xpack/inference/InferencePlugin.java | 14 +- .../inference/InferenceSecretsIndex.java | 17 +- .../inference/registry/ModelRegistry.java | 1 + 5 files changed, 280 insertions(+), 10 deletions(-) create mode 100644 x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/integration/InferenceIndicesIT.java diff --git a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/integration/InferenceIndicesIT.java b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/integration/InferenceIndicesIT.java new file mode 100644 index 0000000000000..3c5ae3629d0c2 --- /dev/null +++ b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/integration/InferenceIndicesIT.java @@ -0,0 +1,241 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.integration; + +import org.elasticsearch.action.ActionFuture; +import org.elasticsearch.action.search.SearchPhaseExecutionException; +import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.inference.InferenceServiceExtension; +import org.elasticsearch.inference.TaskType; +import org.elasticsearch.license.LicenseSettings; +import org.elasticsearch.license.XPackLicenseState; +import org.elasticsearch.plugins.Plugin; +import org.elasticsearch.test.ESIntegTestCase; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xcontent.XContentFactory; +import org.elasticsearch.xcontent.XContentType; +import org.elasticsearch.xpack.core.LocalStateCompositeXPackPlugin; +import org.elasticsearch.xpack.core.inference.action.GetInferenceModelAction; +import org.elasticsearch.xpack.core.inference.action.PutInferenceModelAction; +import org.elasticsearch.xpack.core.ssl.SSLService; +import org.elasticsearch.xpack.inference.InferenceIndex; +import org.elasticsearch.xpack.inference.InferencePlugin; +import org.elasticsearch.xpack.inference.InferenceSecretsIndex; +import org.elasticsearch.xpack.inference.mock.TestDenseInferenceServiceExtension; +import org.elasticsearch.xpack.inference.mock.TestInferenceServicePlugin; +import org.elasticsearch.xpack.inference.mock.TestSparseInferenceServiceExtension; +import org.junit.After; + +import java.io.IOException; +import java.nio.file.Path; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import static org.hamcrest.CoreMatchers.containsString; +import static org.hamcrest.CoreMatchers.equalTo; + +@ESTestCase.WithoutEntitlements // due to dependency issue ES-12435 +public class InferenceIndicesIT extends ESIntegTestCase { + + private static final String INDEX_ROUTER_ATTRIBUTE = "node.attr.index_router"; + private static final String CONFIG_ROUTER = "config"; + private static final String SECRETS_ROUTER = "secrets"; + + private static final Map BBQ_COMPATIBLE_SERVICE_SETTINGS = Map.of( + "model", + "my_model", + "dimensions", + 256, + "similarity", + "cosine", + "api_key", + "my_api_key" + ); + + private final Map inferenceIds = new HashMap<>(); + + public static class LocalStateIndexSettingsInferencePlugin extends LocalStateCompositeXPackPlugin { + private final InferencePlugin inferencePlugin; + + public LocalStateIndexSettingsInferencePlugin(final Settings settings, final Path configPath) throws Exception { + super(settings, configPath); + var thisVar = this; + this.inferencePlugin = new InferencePlugin(settings) { + @Override + protected SSLService getSslService() { + return thisVar.getSslService(); + } + + @Override + protected XPackLicenseState getLicenseState() { + return thisVar.getLicenseState(); + } + + @Override + public List getInferenceServiceFactories() { + return List.of( + TestSparseInferenceServiceExtension.TestInferenceService::new, + TestDenseInferenceServiceExtension.TestInferenceService::new + ); + } + + @Override + public Settings getIndexSettings() { + return InferenceIndex.settingsForTesting( + Settings.builder().put("index.routing.allocation.require.index_router", "config").build() + ); + } + + @Override + public Settings getSecretsIndexSettings() { + return InferenceSecretsIndex.settingsForTesting( + Settings.builder().put("index.routing.allocation.require.index_router", "secrets").build() + ); + } + }; + plugins.add(inferencePlugin); + } + + } + + @After + public void cleanUp() { + // for (var entry : inferenceIds.entrySet()) { + // assertAcked( + // safeGet( + // client().execute( + // DeleteInferenceEndpointAction.INSTANCE, + // new DeleteInferenceEndpointAction.Request(entry.getKey(), entry.getValue(), true, false) + // ) + // ) + // ); + // } + } + + @Override + protected Settings nodeSettings(int nodeOrdinal, Settings otherSettings) { + return Settings.builder().put(LicenseSettings.SELF_GENERATED_LICENSE_TYPE.getKey(), "trial").build(); + } + + @Override + protected Collection> nodePlugins() { + return List.of(LocalStateIndexSettingsInferencePlugin.class, TestInferenceServicePlugin.class); + } + + public void testRetrievingInferenceEndpoint_ThrowsException_WhenIndexNodeIsNotAvailable() throws Exception { + final Settings configIndexNodeAttributes = Settings.builder().put(INDEX_ROUTER_ATTRIBUTE, CONFIG_ROUTER).build(); + + internalCluster().startMasterOnlyNode(configIndexNodeAttributes); + final String configIndexDataNodes = internalCluster().startDataOnlyNode(configIndexNodeAttributes); + + internalCluster().startDataOnlyNode(Settings.builder().put(INDEX_ROUTER_ATTRIBUTE, SECRETS_ROUTER).build()); + + final String inferenceId = "test-index-id"; + createInferenceEndpoint(TaskType.TEXT_EMBEDDING, inferenceId, BBQ_COMPATIBLE_SERVICE_SETTINGS); + + // Ensure the inference indices are created and we can retrieve the inference endpoint + GetInferenceModelAction.Request getInferenceEndpointRequest = new GetInferenceModelAction.Request( + inferenceId, + TaskType.TEXT_EMBEDDING, + true + ); + var responseFuture = client().execute(GetInferenceModelAction.INSTANCE, getInferenceEndpointRequest); + assertThat(responseFuture.actionGet(TEST_REQUEST_TIMEOUT).getEndpoints().get(0).getInferenceEntityId(), equalTo(inferenceId)); + + // stop the node that holds the inference index + internalCluster().stopNode(configIndexDataNodes); + + var responseFailureFuture = client().execute(GetInferenceModelAction.INSTANCE, getInferenceEndpointRequest); + var exception = expectThrows(SearchPhaseExecutionException.class, () -> responseFailureFuture.actionGet(TEST_REQUEST_TIMEOUT)); + + assertThat(exception.toString(), containsString("all shards failed")); + assertThat(exception.toString(), containsString("Node not connected")); + assertThat(exception.toString(), containsString(".inference")); + } + + public void testRetrievingInferenceEndpoint_ThrowsException_WhenSecretsIndexNodeIsNotAvailable() throws Exception { + final Settings configIndexNodeAttributes = Settings.builder().put(INDEX_ROUTER_ATTRIBUTE, CONFIG_ROUTER).build(); + internalCluster().startMasterOnlyNode(configIndexNodeAttributes); + internalCluster().startDataOnlyNode(configIndexNodeAttributes); + + String secretIndexDataNodes = internalCluster().startDataOnlyNode( + Settings.builder().put(INDEX_ROUTER_ATTRIBUTE, SECRETS_ROUTER).build() + ); + + final String inferenceId = "test-secrets-index-id"; + createInferenceEndpoint(TaskType.TEXT_EMBEDDING, inferenceId, BBQ_COMPATIBLE_SERVICE_SETTINGS); + + // Ensure the inference indices are created and we can retrieve the inference endpoint + GetInferenceModelAction.Request getInferenceEndpointRequest = new GetInferenceModelAction.Request( + inferenceId, + TaskType.TEXT_EMBEDDING, + true + ); + var responseFuture = client().execute(GetInferenceModelAction.INSTANCE, getInferenceEndpointRequest); + assertThat(responseFuture.actionGet(TEST_REQUEST_TIMEOUT).getEndpoints().get(0).getInferenceEntityId(), equalTo(inferenceId)); + + // stop the node that holds the inference secrets index + internalCluster().stopNode(secretIndexDataNodes); + + // We should not be able to create a new inference endpoint because the secrets index is not available + final String inferenceIdFailing = "test-secrets-index-id2"; + var responseFailureFuture = createInferenceEndpointAsync( + TaskType.TEXT_EMBEDDING, + inferenceIdFailing, + BBQ_COMPATIBLE_SERVICE_SETTINGS + ); + var exception = expectThrows(SearchPhaseExecutionException.class, () -> responseFailureFuture.actionGet(TEST_REQUEST_TIMEOUT)); + + assertThat(exception.toString(), containsString("all shards failed")); + assertThat(exception.toString(), containsString("Node not connected")); + assertThat(exception.toString(), containsString(".inference-secrets")); + } + + private void createInferenceEndpoint(TaskType taskType, String inferenceId, Map serviceSettings) throws IOException { + var responseFuture = createInferenceEndpointAsync(taskType, inferenceId, serviceSettings); + assertThat(responseFuture.actionGet(TEST_REQUEST_TIMEOUT).getModel().getInferenceEntityId(), equalTo(inferenceId)); + + inferenceIds.put(inferenceId, taskType); + } + + private ActionFuture createInferenceEndpointAsync( + TaskType taskType, + String inferenceId, + Map serviceSettings + ) throws IOException { + final String service = switch (taskType) { + case TEXT_EMBEDDING -> TestDenseInferenceServiceExtension.TestInferenceService.NAME; + case SPARSE_EMBEDDING -> TestSparseInferenceServiceExtension.TestInferenceService.NAME; + default -> throw new IllegalArgumentException("Unhandled task type [" + taskType + "]"); + }; + + final BytesReference content; + try (XContentBuilder builder = XContentFactory.jsonBuilder()) { + builder.startObject(); + builder.field("service", service); + builder.field("service_settings", serviceSettings); + builder.endObject(); + + content = BytesReference.bytes(builder); + } + + PutInferenceModelAction.Request request = new PutInferenceModelAction.Request( + taskType, + inferenceId, + content, + XContentType.JSON, + TEST_REQUEST_TIMEOUT + ); + + return client().execute(PutInferenceModelAction.INSTANCE, request); + } +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceIndex.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceIndex.java index 1c93494d78636..3ae189c5de7b8 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceIndex.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceIndex.java @@ -30,10 +30,19 @@ private InferenceIndex() {} private static final int INDEX_MAPPING_VERSION = 2; public static Settings settings() { - return Settings.builder() - .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) - .put(IndexMetadata.SETTING_AUTO_EXPAND_REPLICAS, "0-1") - .build(); + return builder().build(); + } + + private static Settings.Builder builder() { + return Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1).put(IndexMetadata.SETTING_AUTO_EXPAND_REPLICAS, "0-1"); + } + + /** + * This allows tests to set additional settings for the inference index. + * @param extraSettings additional settings to apply to the inference index. + */ + public static Settings settingsForTesting(Settings extraSettings) { + return builder().put(extraSettings).build(); } /** diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferencePlugin.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferencePlugin.java index 1a2de4cc6b31f..c3ae4f0d9d6d6 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferencePlugin.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferencePlugin.java @@ -466,7 +466,7 @@ public Collection getSystemIndexDescriptors(Settings sett .setPrimaryIndex(InferenceIndex.INDEX_NAME) .setDescription("Contains inference service and model configuration") .setMappings(InferenceIndex.mappings()) - .setSettings(InferenceIndex.settings()) + .setSettings(getIndexSettings()) .setOrigin(ClientHelper.INFERENCE_ORIGIN) .setPriorSystemIndexDescriptors(List.of(inferenceIndexV1Descriptor)) .build(), @@ -476,13 +476,23 @@ public Collection getSystemIndexDescriptors(Settings sett .setPrimaryIndex(InferenceSecretsIndex.INDEX_NAME) .setDescription("Contains inference service secrets") .setMappings(InferenceSecretsIndex.mappings()) - .setSettings(InferenceSecretsIndex.settings()) + .setSettings(getSecretsIndexSettings()) .setOrigin(ClientHelper.INFERENCE_ORIGIN) .setNetNew() .build() ); } + // Overridable for tests + protected Settings getIndexSettings() { + return InferenceIndex.settings(); + } + + // Overridable for tests + protected Settings getSecretsIndexSettings() { + return InferenceSecretsIndex.settings(); + } + @Override public List> getExecutorBuilders(Settings settingsToUse) { return List.of(inferenceUtilityExecutor(settings)); diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceSecretsIndex.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceSecretsIndex.java index f11864eb9f068..31f78d9c21f7a 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceSecretsIndex.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceSecretsIndex.java @@ -29,10 +29,19 @@ private InferenceSecretsIndex() {} private static final int INDEX_MAPPING_VERSION = 1; public static Settings settings() { - return Settings.builder() - .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) - .put(IndexMetadata.SETTING_AUTO_EXPAND_REPLICAS, "0-1") - .build(); + return builder().build(); + } + + private static Settings.Builder builder() { + return Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1).put(IndexMetadata.SETTING_AUTO_EXPAND_REPLICAS, "0-1"); + } + + /** + * This allows tests to set additional settings for the inference index. + * @param extraSettings additional settings to apply to the inference index. + */ + public static Settings settingsForTesting(Settings extraSettings) { + return builder().put(extraSettings).build(); } /** diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/registry/ModelRegistry.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/registry/ModelRegistry.java index 37a82b2160595..3102262e55363 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/registry/ModelRegistry.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/registry/ModelRegistry.java @@ -268,6 +268,7 @@ public void getModelWithSecrets(String inferenceEntityId, ActionListener Date: Fri, 1 Aug 2025 17:42:45 -0400 Subject: [PATCH 2/7] Update docs/changelog/132362.yaml --- docs/changelog/132362.yaml | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 docs/changelog/132362.yaml diff --git a/docs/changelog/132362.yaml b/docs/changelog/132362.yaml new file mode 100644 index 0000000000000..8cdf915346136 --- /dev/null +++ b/docs/changelog/132362.yaml @@ -0,0 +1,5 @@ +pr: 132362 +summary: Inference API disable partial search results +area: Machine Learning +type: bug +issues: [] From 796d94e7270efde375d666c0ecbd2a58de58f5ed Mon Sep 17 00:00:00 2001 From: Jonathan Buttner Date: Mon, 4 Aug 2025 14:12:42 -0400 Subject: [PATCH 3/7] Adding integration test --- .../integration/InferenceIndicesIT.java | 106 +++++++----------- 1 file changed, 43 insertions(+), 63 deletions(-) diff --git a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/integration/InferenceIndicesIT.java b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/integration/InferenceIndicesIT.java index 3c5ae3629d0c2..811a42533aece 100644 --- a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/integration/InferenceIndicesIT.java +++ b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/integration/InferenceIndicesIT.java @@ -11,6 +11,7 @@ import org.elasticsearch.action.search.SearchPhaseExecutionException; import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.core.TimeValue; import org.elasticsearch.inference.InferenceServiceExtension; import org.elasticsearch.inference.TaskType; import org.elasticsearch.license.LicenseSettings; @@ -22,7 +23,10 @@ import org.elasticsearch.xcontent.XContentFactory; import org.elasticsearch.xcontent.XContentType; import org.elasticsearch.xpack.core.LocalStateCompositeXPackPlugin; +import org.elasticsearch.xpack.core.inference.InferenceContext; import org.elasticsearch.xpack.core.inference.action.GetInferenceModelAction; +import org.elasticsearch.xpack.core.inference.action.InferenceAction; +import org.elasticsearch.xpack.core.inference.action.InferenceActionProxy; import org.elasticsearch.xpack.core.inference.action.PutInferenceModelAction; import org.elasticsearch.xpack.core.ssl.SSLService; import org.elasticsearch.xpack.inference.InferenceIndex; @@ -31,12 +35,10 @@ import org.elasticsearch.xpack.inference.mock.TestDenseInferenceServiceExtension; import org.elasticsearch.xpack.inference.mock.TestInferenceServicePlugin; import org.elasticsearch.xpack.inference.mock.TestSparseInferenceServiceExtension; -import org.junit.After; import java.io.IOException; import java.nio.file.Path; import java.util.Collection; -import java.util.HashMap; import java.util.List; import java.util.Map; @@ -50,7 +52,7 @@ public class InferenceIndicesIT extends ESIntegTestCase { private static final String CONFIG_ROUTER = "config"; private static final String SECRETS_ROUTER = "secrets"; - private static final Map BBQ_COMPATIBLE_SERVICE_SETTINGS = Map.of( + private static final Map TEST_SERVICE_SETTINGS = Map.of( "model", "my_model", "dimensions", @@ -61,8 +63,6 @@ public class InferenceIndicesIT extends ESIntegTestCase { "my_api_key" ); - private final Map inferenceIds = new HashMap<>(); - public static class LocalStateIndexSettingsInferencePlugin extends LocalStateCompositeXPackPlugin { private final InferencePlugin inferencePlugin; @@ -107,20 +107,6 @@ public Settings getSecretsIndexSettings() { } - @After - public void cleanUp() { - // for (var entry : inferenceIds.entrySet()) { - // assertAcked( - // safeGet( - // client().execute( - // DeleteInferenceEndpointAction.INSTANCE, - // new DeleteInferenceEndpointAction.Request(entry.getKey(), entry.getValue(), true, false) - // ) - // ) - // ); - // } - } - @Override protected Settings nodeSettings(int nodeOrdinal, Settings otherSettings) { return Settings.builder().put(LicenseSettings.SELF_GENERATED_LICENSE_TYPE.getKey(), "trial").build(); @@ -132,22 +118,18 @@ protected Collection> nodePlugins() { } public void testRetrievingInferenceEndpoint_ThrowsException_WhenIndexNodeIsNotAvailable() throws Exception { - final Settings configIndexNodeAttributes = Settings.builder().put(INDEX_ROUTER_ATTRIBUTE, CONFIG_ROUTER).build(); + final var configIndexNodeAttributes = Settings.builder().put(INDEX_ROUTER_ATTRIBUTE, CONFIG_ROUTER).build(); internalCluster().startMasterOnlyNode(configIndexNodeAttributes); - final String configIndexDataNodes = internalCluster().startDataOnlyNode(configIndexNodeAttributes); + final var configIndexDataNodes = internalCluster().startDataOnlyNode(configIndexNodeAttributes); internalCluster().startDataOnlyNode(Settings.builder().put(INDEX_ROUTER_ATTRIBUTE, SECRETS_ROUTER).build()); - final String inferenceId = "test-index-id"; - createInferenceEndpoint(TaskType.TEXT_EMBEDDING, inferenceId, BBQ_COMPATIBLE_SERVICE_SETTINGS); + final var inferenceId = "test-index-id"; + createInferenceEndpoint(TaskType.TEXT_EMBEDDING, inferenceId, TEST_SERVICE_SETTINGS); // Ensure the inference indices are created and we can retrieve the inference endpoint - GetInferenceModelAction.Request getInferenceEndpointRequest = new GetInferenceModelAction.Request( - inferenceId, - TaskType.TEXT_EMBEDDING, - true - ); + var getInferenceEndpointRequest = new GetInferenceModelAction.Request(inferenceId, TaskType.TEXT_EMBEDDING, true); var responseFuture = client().execute(GetInferenceModelAction.INSTANCE, getInferenceEndpointRequest); assertThat(responseFuture.actionGet(TEST_REQUEST_TIMEOUT).getEndpoints().get(0).getInferenceEntityId(), equalTo(inferenceId)); @@ -163,48 +145,59 @@ public void testRetrievingInferenceEndpoint_ThrowsException_WhenIndexNodeIsNotAv } public void testRetrievingInferenceEndpoint_ThrowsException_WhenSecretsIndexNodeIsNotAvailable() throws Exception { - final Settings configIndexNodeAttributes = Settings.builder().put(INDEX_ROUTER_ATTRIBUTE, CONFIG_ROUTER).build(); + final var configIndexNodeAttributes = Settings.builder().put(INDEX_ROUTER_ATTRIBUTE, CONFIG_ROUTER).build(); internalCluster().startMasterOnlyNode(configIndexNodeAttributes); internalCluster().startDataOnlyNode(configIndexNodeAttributes); - String secretIndexDataNodes = internalCluster().startDataOnlyNode( + var secretIndexDataNodes = internalCluster().startDataOnlyNode( Settings.builder().put(INDEX_ROUTER_ATTRIBUTE, SECRETS_ROUTER).build() ); - final String inferenceId = "test-secrets-index-id"; - createInferenceEndpoint(TaskType.TEXT_EMBEDDING, inferenceId, BBQ_COMPATIBLE_SERVICE_SETTINGS); + final var inferenceId = "test-secrets-index-id"; + createInferenceEndpoint(TaskType.TEXT_EMBEDDING, inferenceId, TEST_SERVICE_SETTINGS); // Ensure the inference indices are created and we can retrieve the inference endpoint - GetInferenceModelAction.Request getInferenceEndpointRequest = new GetInferenceModelAction.Request( - inferenceId, - TaskType.TEXT_EMBEDDING, - true - ); + var getInferenceEndpointRequest = new GetInferenceModelAction.Request(inferenceId, TaskType.TEXT_EMBEDDING, true); var responseFuture = client().execute(GetInferenceModelAction.INSTANCE, getInferenceEndpointRequest); assertThat(responseFuture.actionGet(TEST_REQUEST_TIMEOUT).getEndpoints().get(0).getInferenceEntityId(), equalTo(inferenceId)); // stop the node that holds the inference secrets index internalCluster().stopNode(secretIndexDataNodes); - // We should not be able to create a new inference endpoint because the secrets index is not available - final String inferenceIdFailing = "test-secrets-index-id2"; - var responseFailureFuture = createInferenceEndpointAsync( + var proxyResponse = sendInferenceProxyRequest(inferenceId); + var exception = expectThrows(SearchPhaseExecutionException.class, () -> proxyResponse.actionGet(TEST_REQUEST_TIMEOUT)); + + assertThat(exception.toString(), containsString("shards failure")); + assertThat(exception.toString(), containsString("Node not connected")); + assertThat(exception.toString(), containsString(".secrets-inference")); + } + + private ActionFuture sendInferenceProxyRequest(String inferenceId) throws IOException { + final BytesReference content; + try (XContentBuilder builder = XContentFactory.jsonBuilder()) { + builder.startObject(); + builder.field("input", List.of("test input")); + builder.endObject(); + + content = BytesReference.bytes(builder); + } + + var inferenceRequest = new InferenceActionProxy.Request( TaskType.TEXT_EMBEDDING, - inferenceIdFailing, - BBQ_COMPATIBLE_SERVICE_SETTINGS + inferenceId, + content, + XContentType.JSON, + TimeValue.THIRTY_SECONDS, + false, + InferenceContext.EMPTY_INSTANCE ); - var exception = expectThrows(SearchPhaseExecutionException.class, () -> responseFailureFuture.actionGet(TEST_REQUEST_TIMEOUT)); - assertThat(exception.toString(), containsString("all shards failed")); - assertThat(exception.toString(), containsString("Node not connected")); - assertThat(exception.toString(), containsString(".inference-secrets")); + return client().execute(InferenceActionProxy.INSTANCE, inferenceRequest); } private void createInferenceEndpoint(TaskType taskType, String inferenceId, Map serviceSettings) throws IOException { var responseFuture = createInferenceEndpointAsync(taskType, inferenceId, serviceSettings); assertThat(responseFuture.actionGet(TEST_REQUEST_TIMEOUT).getModel().getInferenceEntityId(), equalTo(inferenceId)); - - inferenceIds.put(inferenceId, taskType); } private ActionFuture createInferenceEndpointAsync( @@ -212,30 +205,17 @@ private ActionFuture createInferenceEndpointAs String inferenceId, Map serviceSettings ) throws IOException { - final String service = switch (taskType) { - case TEXT_EMBEDDING -> TestDenseInferenceServiceExtension.TestInferenceService.NAME; - case SPARSE_EMBEDDING -> TestSparseInferenceServiceExtension.TestInferenceService.NAME; - default -> throw new IllegalArgumentException("Unhandled task type [" + taskType + "]"); - }; - final BytesReference content; try (XContentBuilder builder = XContentFactory.jsonBuilder()) { builder.startObject(); - builder.field("service", service); + builder.field("service", TestDenseInferenceServiceExtension.TestInferenceService.NAME); builder.field("service_settings", serviceSettings); builder.endObject(); content = BytesReference.bytes(builder); } - PutInferenceModelAction.Request request = new PutInferenceModelAction.Request( - taskType, - inferenceId, - content, - XContentType.JSON, - TEST_REQUEST_TIMEOUT - ); - + var request = new PutInferenceModelAction.Request(taskType, inferenceId, content, XContentType.JSON, TEST_REQUEST_TIMEOUT); return client().execute(PutInferenceModelAction.INSTANCE, request); } } From 926c4fe47b1df9070dc8a07d502dddccaac42078 Mon Sep 17 00:00:00 2001 From: Jonathan Buttner Date: Mon, 4 Aug 2025 15:40:16 -0400 Subject: [PATCH 4/7] Wrapping exception --- .../integration/InferenceIndicesIT.java | 50 ++++++++++++++++--- .../inference/registry/ModelRegistry.java | 28 ++++++++--- 2 files changed, 64 insertions(+), 14 deletions(-) diff --git a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/integration/InferenceIndicesIT.java b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/integration/InferenceIndicesIT.java index 811a42533aece..4b1ab587dd983 100644 --- a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/integration/InferenceIndicesIT.java +++ b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/integration/InferenceIndicesIT.java @@ -7,6 +7,7 @@ package org.elasticsearch.xpack.inference.integration; +import org.elasticsearch.ElasticsearchException; import org.elasticsearch.action.ActionFuture; import org.elasticsearch.action.search.SearchPhaseExecutionException; import org.elasticsearch.common.bytes.BytesReference; @@ -44,6 +45,7 @@ import static org.hamcrest.CoreMatchers.containsString; import static org.hamcrest.CoreMatchers.equalTo; +import static org.hamcrest.Matchers.instanceOf; @ESTestCase.WithoutEntitlements // due to dependency issue ES-12435 public class InferenceIndicesIT extends ESIntegTestCase { @@ -137,11 +139,40 @@ public void testRetrievingInferenceEndpoint_ThrowsException_WhenIndexNodeIsNotAv internalCluster().stopNode(configIndexDataNodes); var responseFailureFuture = client().execute(GetInferenceModelAction.INSTANCE, getInferenceEndpointRequest); - var exception = expectThrows(SearchPhaseExecutionException.class, () -> responseFailureFuture.actionGet(TEST_REQUEST_TIMEOUT)); + var exception = expectThrows(ElasticsearchException.class, () -> responseFailureFuture.actionGet(TEST_REQUEST_TIMEOUT)); + assertThat(exception.toString(), containsString("Failed to load inference endpoint [test-index-id]")); - assertThat(exception.toString(), containsString("all shards failed")); - assertThat(exception.toString(), containsString("Node not connected")); - assertThat(exception.toString(), containsString(".inference")); + var causeException = exception.getCause(); + assertThat(causeException, instanceOf(SearchPhaseExecutionException.class)); + assertThat(causeException.toString(), containsString(".inference")); + } + + public void testRetrievingInferenceEndpoint_ThrowsException_WhenIndexNodeIsNotAvailable_ForInferenceAction() throws Exception { + final var configIndexNodeAttributes = Settings.builder().put(INDEX_ROUTER_ATTRIBUTE, CONFIG_ROUTER).build(); + + internalCluster().startMasterOnlyNode(configIndexNodeAttributes); + final var configIndexDataNodes = internalCluster().startDataOnlyNode(configIndexNodeAttributes); + + internalCluster().startDataOnlyNode(Settings.builder().put(INDEX_ROUTER_ATTRIBUTE, SECRETS_ROUTER).build()); + + final var inferenceId = "test-index-id-2"; + createInferenceEndpoint(TaskType.TEXT_EMBEDDING, inferenceId, TEST_SERVICE_SETTINGS); + + // Ensure the inference indices are created and we can retrieve the inference endpoint + var getInferenceEndpointRequest = new GetInferenceModelAction.Request(inferenceId, TaskType.TEXT_EMBEDDING, true); + var responseFuture = client().execute(GetInferenceModelAction.INSTANCE, getInferenceEndpointRequest); + assertThat(responseFuture.actionGet(TEST_REQUEST_TIMEOUT).getEndpoints().get(0).getInferenceEntityId(), equalTo(inferenceId)); + + // stop the node that holds the inference index + internalCluster().stopNode(configIndexDataNodes); + + var proxyResponse = sendInferenceProxyRequest(inferenceId); + var exception = expectThrows(ElasticsearchException.class, () -> proxyResponse.actionGet(TEST_REQUEST_TIMEOUT)); + assertThat(exception.toString(), containsString("Failed to load inference endpoint [test-index-id-2]")); + + var causeException = exception.getCause(); + assertThat(causeException, instanceOf(SearchPhaseExecutionException.class)); + assertThat(causeException.toString(), containsString(".inference")); } public void testRetrievingInferenceEndpoint_ThrowsException_WhenSecretsIndexNodeIsNotAvailable() throws Exception { @@ -165,11 +196,14 @@ public void testRetrievingInferenceEndpoint_ThrowsException_WhenSecretsIndexNode internalCluster().stopNode(secretIndexDataNodes); var proxyResponse = sendInferenceProxyRequest(inferenceId); - var exception = expectThrows(SearchPhaseExecutionException.class, () -> proxyResponse.actionGet(TEST_REQUEST_TIMEOUT)); - assertThat(exception.toString(), containsString("shards failure")); - assertThat(exception.toString(), containsString("Node not connected")); - assertThat(exception.toString(), containsString(".secrets-inference")); + var exception = expectThrows(ElasticsearchException.class, () -> proxyResponse.actionGet(TEST_REQUEST_TIMEOUT)); + assertThat(exception.toString(), containsString("Failed to load inference endpoint [test-secrets-index-id]")); + + var causeException = exception.getCause(); + + assertThat(causeException, instanceOf(SearchPhaseExecutionException.class)); + assertThat(causeException.toString(), containsString(".secrets-inference")); } private ActionFuture sendInferenceProxyRequest(String inferenceId) throws IOException { diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/registry/ModelRegistry.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/registry/ModelRegistry.java index 3102262e55363..1377632015ce2 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/registry/ModelRegistry.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/registry/ModelRegistry.java @@ -249,19 +249,27 @@ public MinimalServiceSettings getMinimalServiceSettings(String inferenceEntityId * @param listener Model listener */ public void getModelWithSecrets(String inferenceEntityId, ActionListener listener) { - ActionListener searchListener = listener.delegateFailureAndWrap((delegate, searchResponse) -> { + ActionListener searchListener = ActionListener.wrap((searchResponse) -> { // There should be a hit for the configurations if (searchResponse.getHits().getHits().length == 0) { var maybeDefault = defaultConfigIds.get(inferenceEntityId); if (maybeDefault != null) { getDefaultConfig(true, maybeDefault, listener); } else { - delegate.onFailure(inferenceNotFoundException(inferenceEntityId)); + listener.onFailure(inferenceNotFoundException(inferenceEntityId)); } return; } - delegate.onResponse(unparsedModelFromMap(createModelConfigMap(searchResponse.getHits(), inferenceEntityId))); + listener.onResponse(unparsedModelFromMap(createModelConfigMap(searchResponse.getHits(), inferenceEntityId))); + }, (e) -> { + logger.warn(format("Failed to load inference endpoint [%s]", inferenceEntityId), e); + listener.onFailure( + new ElasticsearchException( + format("Failed to load inference endpoint [%s], error: [%s]", inferenceEntityId, e.getMessage()), + e + ) + ); }); QueryBuilder queryBuilder = documentIdQuery(inferenceEntityId); @@ -281,21 +289,29 @@ public void getModelWithSecrets(String inferenceEntityId, ActionListener listener) { - ActionListener searchListener = listener.delegateFailureAndWrap((delegate, searchResponse) -> { + ActionListener searchListener = ActionListener.wrap((searchResponse) -> { // There should be a hit for the configurations if (searchResponse.getHits().getHits().length == 0) { var maybeDefault = defaultConfigIds.get(inferenceEntityId); if (maybeDefault != null) { getDefaultConfig(true, maybeDefault, listener); } else { - delegate.onFailure(inferenceNotFoundException(inferenceEntityId)); + listener.onFailure(inferenceNotFoundException(inferenceEntityId)); } return; } var modelConfigs = parseHitsAsModels(searchResponse.getHits()).stream().map(ModelRegistry::unparsedModelFromMap).toList(); assert modelConfigs.size() == 1; - delegate.onResponse(modelConfigs.get(0)); + listener.onResponse(modelConfigs.get(0)); + }, e -> { + logger.warn(format("Failed to load inference endpoint [%s]", inferenceEntityId), e); + listener.onFailure( + new ElasticsearchException( + format("Failed to load inference endpoint [%s], error: [%s]", inferenceEntityId, e.getMessage()), + e + ) + ); }); QueryBuilder queryBuilder = documentIdQuery(inferenceEntityId); From d467eab9f415b06f4052cdd902adb741acd19820 Mon Sep 17 00:00:00 2001 From: Jonathan Buttner Date: Mon, 4 Aug 2025 15:47:51 -0400 Subject: [PATCH 5/7] Fixing flaky tests --- .../xpack/inference/integration/InferenceIndicesIT.java | 6 ++---- .../xpack/inference/registry/ModelRegistry.java | 4 ++-- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/integration/InferenceIndicesIT.java b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/integration/InferenceIndicesIT.java index 4b1ab587dd983..364ff045af633 100644 --- a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/integration/InferenceIndicesIT.java +++ b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/integration/InferenceIndicesIT.java @@ -168,11 +168,10 @@ public void testRetrievingInferenceEndpoint_ThrowsException_WhenIndexNodeIsNotAv var proxyResponse = sendInferenceProxyRequest(inferenceId); var exception = expectThrows(ElasticsearchException.class, () -> proxyResponse.actionGet(TEST_REQUEST_TIMEOUT)); - assertThat(exception.toString(), containsString("Failed to load inference endpoint [test-index-id-2]")); + assertThat(exception.toString(), containsString("Failed to load inference endpoint with secrets [test-index-id-2]")); var causeException = exception.getCause(); assertThat(causeException, instanceOf(SearchPhaseExecutionException.class)); - assertThat(causeException.toString(), containsString(".inference")); } public void testRetrievingInferenceEndpoint_ThrowsException_WhenSecretsIndexNodeIsNotAvailable() throws Exception { @@ -198,12 +197,11 @@ public void testRetrievingInferenceEndpoint_ThrowsException_WhenSecretsIndexNode var proxyResponse = sendInferenceProxyRequest(inferenceId); var exception = expectThrows(ElasticsearchException.class, () -> proxyResponse.actionGet(TEST_REQUEST_TIMEOUT)); - assertThat(exception.toString(), containsString("Failed to load inference endpoint [test-secrets-index-id]")); + assertThat(exception.toString(), containsString("Failed to load inference endpoint with secrets [test-secrets-index-id]")); var causeException = exception.getCause(); assertThat(causeException, instanceOf(SearchPhaseExecutionException.class)); - assertThat(causeException.toString(), containsString(".secrets-inference")); } private ActionFuture sendInferenceProxyRequest(String inferenceId) throws IOException { diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/registry/ModelRegistry.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/registry/ModelRegistry.java index 1377632015ce2..fe7c4a9395cd1 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/registry/ModelRegistry.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/registry/ModelRegistry.java @@ -263,10 +263,10 @@ public void getModelWithSecrets(String inferenceEntityId, ActionListener { - logger.warn(format("Failed to load inference endpoint [%s]", inferenceEntityId), e); + logger.warn(format("Failed to load inference endpoint with secrets [%s]", inferenceEntityId), e); listener.onFailure( new ElasticsearchException( - format("Failed to load inference endpoint [%s], error: [%s]", inferenceEntityId, e.getMessage()), + format("Failed to load inference endpoint with secrets [%s], error: [%s]", inferenceEntityId, e.getMessage()), e ) ); From f0023df9ee7de90b8538ead83473e5bedda1cf50 Mon Sep 17 00:00:00 2001 From: Jonathan Buttner Date: Mon, 4 Aug 2025 15:48:49 -0400 Subject: [PATCH 6/7] Removing assert --- .../xpack/inference/integration/InferenceIndicesIT.java | 1 - 1 file changed, 1 deletion(-) diff --git a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/integration/InferenceIndicesIT.java b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/integration/InferenceIndicesIT.java index 364ff045af633..1085d79d3719b 100644 --- a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/integration/InferenceIndicesIT.java +++ b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/integration/InferenceIndicesIT.java @@ -144,7 +144,6 @@ public void testRetrievingInferenceEndpoint_ThrowsException_WhenIndexNodeIsNotAv var causeException = exception.getCause(); assertThat(causeException, instanceOf(SearchPhaseExecutionException.class)); - assertThat(causeException.toString(), containsString(".inference")); } public void testRetrievingInferenceEndpoint_ThrowsException_WhenIndexNodeIsNotAvailable_ForInferenceAction() throws Exception { From 7c2db2f789bb16313319621c9d1d76d2d9744e5b Mon Sep 17 00:00:00 2001 From: Jonathan Buttner Date: Tue, 5 Aug 2025 10:26:59 -0400 Subject: [PATCH 7/7] Refactoring testing functions --- .../inference/integration/InferenceIndicesIT.java | 12 ++++++------ .../xpack/inference/InferenceIndex.java | 11 ++--------- .../xpack/inference/InferenceSecretsIndex.java | 11 ++--------- 3 files changed, 10 insertions(+), 24 deletions(-) diff --git a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/integration/InferenceIndicesIT.java b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/integration/InferenceIndicesIT.java index 1085d79d3719b..e59f0617851c3 100644 --- a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/integration/InferenceIndicesIT.java +++ b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/integration/InferenceIndicesIT.java @@ -92,16 +92,16 @@ public List getInferenceServiceFactories() { @Override public Settings getIndexSettings() { - return InferenceIndex.settingsForTesting( - Settings.builder().put("index.routing.allocation.require.index_router", "config").build() - ); + return InferenceIndex.builder() + .put(Settings.builder().put("index.routing.allocation.require.index_router", "config").build()) + .build(); } @Override public Settings getSecretsIndexSettings() { - return InferenceSecretsIndex.settingsForTesting( - Settings.builder().put("index.routing.allocation.require.index_router", "secrets").build() - ); + return InferenceSecretsIndex.builder() + .put(Settings.builder().put("index.routing.allocation.require.index_router", "secrets").build()) + .build(); } }; plugins.add(inferencePlugin); diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceIndex.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceIndex.java index 3ae189c5de7b8..eb79fc08bd1a0 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceIndex.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceIndex.java @@ -33,18 +33,11 @@ public static Settings settings() { return builder().build(); } - private static Settings.Builder builder() { + // Public to allow tests to create the index with custom settings + public static Settings.Builder builder() { return Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1).put(IndexMetadata.SETTING_AUTO_EXPAND_REPLICAS, "0-1"); } - /** - * This allows tests to set additional settings for the inference index. - * @param extraSettings additional settings to apply to the inference index. - */ - public static Settings settingsForTesting(Settings extraSettings) { - return builder().put(extraSettings).build(); - } - /** * Reject any unknown fields being added by setting dynamic mappings to * {@code strict} for the top level object. A document that contains unknown diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceSecretsIndex.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceSecretsIndex.java index 31f78d9c21f7a..649dc27e4a493 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceSecretsIndex.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceSecretsIndex.java @@ -32,18 +32,11 @@ public static Settings settings() { return builder().build(); } - private static Settings.Builder builder() { + // Public to allow tests to create the index with custom settings + public static Settings.Builder builder() { return Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1).put(IndexMetadata.SETTING_AUTO_EXPAND_REPLICAS, "0-1"); } - /** - * This allows tests to set additional settings for the inference index. - * @param extraSettings additional settings to apply to the inference index. - */ - public static Settings settingsForTesting(Settings extraSettings) { - return builder().put(extraSettings).build(); - } - /** * Reject any unknown fields being added by setting dynamic mappings to * {@code strict} for the top level object. A document that contains unknown