From a771ccccf15ad0d410dd9abef5fabb2f820d82d5 Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Tue, 7 Oct 2025 17:22:45 -0400 Subject: [PATCH 01/15] Added multi-node test module for inference --- .../inference/qa/multi-node/build.gradle | 14 +++++++++ .../integration/SemanticTextEmbeddingsIT.java | 31 +++++++++++++++++++ 2 files changed, 45 insertions(+) create mode 100644 x-pack/plugin/inference/qa/multi-node/build.gradle create mode 100644 x-pack/plugin/inference/qa/multi-node/src/javaRestTest/java/org/elasticsearch/xpack/inference/integration/SemanticTextEmbeddingsIT.java diff --git a/x-pack/plugin/inference/qa/multi-node/build.gradle b/x-pack/plugin/inference/qa/multi-node/build.gradle new file mode 100644 index 0000000000000..e35706c4d8236 --- /dev/null +++ b/x-pack/plugin/inference/qa/multi-node/build.gradle @@ -0,0 +1,14 @@ +apply plugin: 'elasticsearch.internal-java-rest-test' + +dependencies { + javaRestTestImplementation project(xpackModule('core')) + javaRestTestImplementation testArtifact(project(xpackModule('core'))) + javaRestTestImplementation project(xpackModule('inference')) + javaRestTestImplementation testArtifact(project(xpackModule('inference'))) + javaRestTestImplementation project(':modules:reindex') + javaRestTestImplementation project(':x-pack:plugin:inference:qa:test-service-plugin') +} + +tasks.named('javaRestTest') { + usesDefaultDistribution("to be triaged") +} diff --git a/x-pack/plugin/inference/qa/multi-node/src/javaRestTest/java/org/elasticsearch/xpack/inference/integration/SemanticTextEmbeddingsIT.java b/x-pack/plugin/inference/qa/multi-node/src/javaRestTest/java/org/elasticsearch/xpack/inference/integration/SemanticTextEmbeddingsIT.java new file mode 100644 index 0000000000000..86869628f413d --- /dev/null +++ b/x-pack/plugin/inference/qa/multi-node/src/javaRestTest/java/org/elasticsearch/xpack/inference/integration/SemanticTextEmbeddingsIT.java @@ -0,0 +1,31 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.integration; + +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.license.LicenseSettings; +import org.elasticsearch.plugins.Plugin; +import org.elasticsearch.reindex.ReindexPlugin; +import org.elasticsearch.test.ESIntegTestCase; +import org.elasticsearch.xpack.inference.LocalStateInferencePlugin; +import org.elasticsearch.xpack.inference.mock.TestInferenceServicePlugin; + +import java.util.Collection; +import java.util.List; + +public class SemanticTextEmbeddingsIT extends ESIntegTestCase { + @Override + protected Settings nodeSettings(int nodeOrdinal, Settings otherSettings) { + return Settings.builder().put(LicenseSettings.SELF_GENERATED_LICENSE_TYPE.getKey(), "trial").build(); + } + + @Override + protected Collection> nodePlugins() { + return List.of(LocalStateInferencePlugin.class, TestInferenceServicePlugin.class, ReindexPlugin.class); + } +} From 9a70aeb70b6e2e6b66ae0430131dbd2e1d3b9db0 Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Wed, 8 Oct 2025 12:01:42 -0400 Subject: [PATCH 02/15] Added test when source is disabled and vectors are included by default --- .../integration/SemanticTextEmbeddingsIT.java | 208 +++++++++++++++++- 1 file changed, 207 insertions(+), 1 deletion(-) diff --git a/x-pack/plugin/inference/qa/multi-node/src/javaRestTest/java/org/elasticsearch/xpack/inference/integration/SemanticTextEmbeddingsIT.java b/x-pack/plugin/inference/qa/multi-node/src/javaRestTest/java/org/elasticsearch/xpack/inference/integration/SemanticTextEmbeddingsIT.java index 86869628f413d..9f546d1fe80d0 100644 --- a/x-pack/plugin/inference/qa/multi-node/src/javaRestTest/java/org/elasticsearch/xpack/inference/integration/SemanticTextEmbeddingsIT.java +++ b/x-pack/plugin/inference/qa/multi-node/src/javaRestTest/java/org/elasticsearch/xpack/inference/integration/SemanticTextEmbeddingsIT.java @@ -7,18 +7,68 @@ package org.elasticsearch.xpack.inference.integration; +import org.elasticsearch.action.DocWriteResponse; +import org.elasticsearch.action.search.SearchRequest; +import org.elasticsearch.action.search.SearchResponse; +import org.elasticsearch.action.support.IndicesOptions; +import org.elasticsearch.cluster.metadata.IndexMetadata; +import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.common.io.stream.NamedWriteableRegistry; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.IndexVersion; +import org.elasticsearch.index.query.QueryBuilder; +import org.elasticsearch.inference.TaskType; import org.elasticsearch.license.LicenseSettings; import org.elasticsearch.plugins.Plugin; import org.elasticsearch.reindex.ReindexPlugin; +import org.elasticsearch.search.SearchHit; +import org.elasticsearch.search.builder.SearchSourceBuilder; import org.elasticsearch.test.ESIntegTestCase; +import org.elasticsearch.test.index.IndexVersionUtils; +import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xcontent.XContentFactory; +import org.elasticsearch.xcontent.XContentType; +import org.elasticsearch.xpack.core.inference.action.DeleteInferenceEndpointAction; +import org.elasticsearch.xpack.core.inference.action.PutInferenceModelAction; +import org.elasticsearch.xpack.core.ml.inference.MlInferenceNamedXContentProvider; import org.elasticsearch.xpack.inference.LocalStateInferencePlugin; +import org.elasticsearch.xpack.inference.mapper.SemanticTextFieldMapper; +import org.elasticsearch.xpack.inference.mock.TestDenseInferenceServiceExtension; import org.elasticsearch.xpack.inference.mock.TestInferenceServicePlugin; +import org.elasticsearch.xpack.inference.mock.TestSparseInferenceServiceExtension; +import org.elasticsearch.xpack.inference.queries.SemanticQueryBuilder; +import org.junit.After; +import java.io.IOException; import java.util.Collection; +import java.util.HashMap; import java.util.List; +import java.util.Map; +import java.util.function.Consumer; +import static org.elasticsearch.index.IndexVersions.EXCLUDE_SOURCE_VECTORS_DEFAULT; +import static org.elasticsearch.index.mapper.InferenceMetadataFieldsMapper.USE_NEW_SEMANTIC_TEXT_FORMAT_BY_DEFAULT; +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertResponse; +import static org.hamcrest.CoreMatchers.equalTo; + +@ESIntegTestCase.ClusterScope(minNumDataNodes = 3, maxNumDataNodes = 5) public class SemanticTextEmbeddingsIT extends ESIntegTestCase { + private final String indexName = randomIdentifier(); + private final Map inferenceIds = new HashMap<>(); + + private static final Map SPARSE_EMBEDDING_SERVICE_SETTINGS = Map.of("model", "my_model", "api_key", "my_api_key"); + private static final Map TEXT_EMBEDDING_SERVICE_SETTINGS = Map.of( + "model", + "my_model", + "dimensions", + 256, + "similarity", + "cosine", + "api_key", + "my_api_key" + ); + @Override protected Settings nodeSettings(int nodeOrdinal, Settings otherSettings) { return Settings.builder().put(LicenseSettings.SELF_GENERATED_LICENSE_TYPE.getKey(), "trial").build(); @@ -26,6 +76,162 @@ protected Settings nodeSettings(int nodeOrdinal, Settings otherSettings) { @Override protected Collection> nodePlugins() { - return List.of(LocalStateInferencePlugin.class, TestInferenceServicePlugin.class, ReindexPlugin.class); + return List.of(LocalStateInferencePlugin.class, TestInferenceServicePlugin.class, ReindexPlugin.class, FakeMlPlugin.class); + } + + @Override + protected boolean forbidPrivateIndexSettings() { + return false; + } + + @After + public void cleanUp() { + assertAcked( + safeGet( + client().admin() + .indices() + .prepareDelete(indexName) + .setIndicesOptions( + IndicesOptions.builder().concreteTargetOptions(new IndicesOptions.ConcreteTargetOptions(true)).build() + ) + .execute() + ) + ); + + for (var entry : inferenceIds.entrySet()) { + assertAcked( + safeGet( + client().execute( + DeleteInferenceEndpointAction.INSTANCE, + new DeleteInferenceEndpointAction.Request(entry.getKey(), entry.getValue(), true, false) + ) + ) + ); + } + } + + // TODO: Complete test once fix is implemented + public void testSourceDisabledAndIncludeVectors() throws Exception { + // Get a random index version after when we default to the new semantic text format, and before we exclude vectors in source by + // default + final IndexVersion indexVersion = IndexVersionUtils.randomVersionBetween( + random(), + USE_NEW_SEMANTIC_TEXT_FORMAT_BY_DEFAULT, + IndexVersionUtils.getPreviousVersion(EXCLUDE_SOURCE_VECTORS_DEFAULT) + ); + + final String sparseEmbeddingInferenceId = randomIdentifier(); + final String textEmbeddingInferenceId = randomIdentifier(); + createInferenceEndpoint(TaskType.SPARSE_EMBEDDING, sparseEmbeddingInferenceId, SPARSE_EMBEDDING_SERVICE_SETTINGS); + createInferenceEndpoint(TaskType.TEXT_EMBEDDING, textEmbeddingInferenceId, TEXT_EMBEDDING_SERVICE_SETTINGS); + + final String sparseEmbeddingField = randomIdentifier(); + final String textEmbeddingField = randomIdentifier(); + XContentBuilder mappings = generateMapping( + Map.of(sparseEmbeddingField, sparseEmbeddingInferenceId, textEmbeddingField, textEmbeddingInferenceId) + ); + + assertAcked(prepareCreate(indexName).setSettings(generateIndexSettings(indexVersion)).setMapping(mappings)); + indexDocuments(sparseEmbeddingField, 10); + indexDocuments(textEmbeddingField, 10); + + QueryBuilder sparseEmbeddingFieldQuery = new SemanticQueryBuilder(sparseEmbeddingField, randomAlphaOfLength(10)); + assertSearchResponse( + sparseEmbeddingFieldQuery, + 10, + request -> request.source().fetchSource(false).fetchField(sparseEmbeddingField), + response -> { + for (SearchHit hit : response.getHits()) { + hit.getDocumentFields(); + } + } + ); + } + + private void createInferenceEndpoint(TaskType taskType, String inferenceId, Map serviceSettings) throws IOException { + final String service = switch (taskType) { + case TEXT_EMBEDDING -> TestDenseInferenceServiceExtension.TestInferenceService.NAME; + case SPARSE_EMBEDDING -> TestSparseInferenceServiceExtension.TestInferenceService.NAME; + default -> throw new IllegalArgumentException("Unhandled task type [" + taskType + "]"); + }; + + final BytesReference content; + try (XContentBuilder builder = XContentFactory.jsonBuilder()) { + builder.startObject(); + builder.field("service", service); + builder.field("service_settings", serviceSettings); + builder.endObject(); + + content = BytesReference.bytes(builder); + } + + PutInferenceModelAction.Request request = new PutInferenceModelAction.Request( + taskType, + inferenceId, + content, + XContentType.JSON, + TEST_REQUEST_TIMEOUT + ); + var responseFuture = client().execute(PutInferenceModelAction.INSTANCE, request); + assertThat(responseFuture.actionGet(TEST_REQUEST_TIMEOUT).getModel().getInferenceEntityId(), equalTo(inferenceId)); + + inferenceIds.put(inferenceId, taskType); + } + + private Settings generateIndexSettings(IndexVersion indexVersion) { + int numDataNodes = internalCluster().numDataNodes(); + return Settings.builder() + .put(IndexMetadata.SETTING_VERSION_CREATED, indexVersion) + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, numDataNodes) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0) + .build(); + } + + private void indexDocuments(String field, int count) { + for (int i = 0; i < count; i++) { + Map source = Map.of(field, randomAlphaOfLength(10)); + DocWriteResponse response = client().prepareIndex(indexName).setSource(source).get(TEST_REQUEST_TIMEOUT); + assertThat(response.getResult(), equalTo(DocWriteResponse.Result.CREATED)); + } + + client().admin().indices().prepareRefresh(indexName).get(); + } + + private void assertSearchResponse( + QueryBuilder queryBuilder, + long expectedHits, + Consumer searchRequestModifier, + Consumer searchResponseValidator + ) throws Exception { + SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder().query(queryBuilder); + SearchRequest searchRequest = new SearchRequest(new String[] { indexName }, searchSourceBuilder); + if (searchRequestModifier != null) { + searchRequestModifier.accept(searchRequest); + } + + assertResponse(client().search(searchRequest), response -> { + assertThat(response.getHits().getTotalHits().value(), equalTo(expectedHits)); + searchResponseValidator.accept(response); + }); + } + + private static XContentBuilder generateMapping(Map semanticTextFields) throws IOException { + XContentBuilder mapping = XContentFactory.jsonBuilder().startObject().startObject("properties"); + for (var entry : semanticTextFields.entrySet()) { + mapping.startObject(entry.getKey()); + mapping.field("type", SemanticTextFieldMapper.CONTENT_TYPE); + mapping.field("inference_id", entry.getValue()); + mapping.endObject(); + } + mapping.endObject().endObject(); + + return mapping; + } + + public static class FakeMlPlugin extends Plugin { + @Override + public List getNamedWriteables() { + return new MlInferenceNamedXContentProvider().getNamedWriteables(); + } } } From 7eb7cde030671995228a468d2c9d7690d439a9e4 Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Wed, 8 Oct 2025 12:51:40 -0400 Subject: [PATCH 03/15] Move integration test to inference plugin --- .../xpack/inference/integration/SemanticTextEmbeddingsIT.java | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename x-pack/plugin/inference/{qa/multi-node/src/javaRestTest => src/internalClusterTest}/java/org/elasticsearch/xpack/inference/integration/SemanticTextEmbeddingsIT.java (100%) diff --git a/x-pack/plugin/inference/qa/multi-node/src/javaRestTest/java/org/elasticsearch/xpack/inference/integration/SemanticTextEmbeddingsIT.java b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/integration/SemanticTextEmbeddingsIT.java similarity index 100% rename from x-pack/plugin/inference/qa/multi-node/src/javaRestTest/java/org/elasticsearch/xpack/inference/integration/SemanticTextEmbeddingsIT.java rename to x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/integration/SemanticTextEmbeddingsIT.java From 9a9c684bc90510dd687cc9bab1828832b52176f2 Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Wed, 8 Oct 2025 12:53:02 -0400 Subject: [PATCH 04/15] Remove multi-node module --- x-pack/plugin/inference/qa/multi-node/build.gradle | 14 -------------- 1 file changed, 14 deletions(-) delete mode 100644 x-pack/plugin/inference/qa/multi-node/build.gradle diff --git a/x-pack/plugin/inference/qa/multi-node/build.gradle b/x-pack/plugin/inference/qa/multi-node/build.gradle deleted file mode 100644 index e35706c4d8236..0000000000000 --- a/x-pack/plugin/inference/qa/multi-node/build.gradle +++ /dev/null @@ -1,14 +0,0 @@ -apply plugin: 'elasticsearch.internal-java-rest-test' - -dependencies { - javaRestTestImplementation project(xpackModule('core')) - javaRestTestImplementation testArtifact(project(xpackModule('core'))) - javaRestTestImplementation project(xpackModule('inference')) - javaRestTestImplementation testArtifact(project(xpackModule('inference'))) - javaRestTestImplementation project(':modules:reindex') - javaRestTestImplementation project(':x-pack:plugin:inference:qa:test-service-plugin') -} - -tasks.named('javaRestTest') { - usesDefaultDistribution("to be triaged") -} From f69c9c7c16d994ba0b8c788b03cec56f473751c6 Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Wed, 8 Oct 2025 14:30:01 -0400 Subject: [PATCH 05/15] Updated shouldExcludeInferenceFieldsFromSource to handle when source is disabled --- .../index/get/ShardGetService.java | 27 ++++++++++++++++--- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/get/ShardGetService.java b/server/src/main/java/org/elasticsearch/index/get/ShardGetService.java index 94630d58a0ecb..c3fe554cb9a15 100644 --- a/server/src/main/java/org/elasticsearch/index/get/ShardGetService.java +++ b/server/src/main/java/org/elasticsearch/index/get/ShardGetService.java @@ -21,6 +21,7 @@ import org.elasticsearch.common.metrics.CounterMetric; import org.elasticsearch.common.metrics.MeanMetric; import org.elasticsearch.common.regex.Regex; +import org.elasticsearch.common.settings.Settings; import org.elasticsearch.core.Nullable; import org.elasticsearch.core.Tuple; import org.elasticsearch.index.IndexSettings; @@ -425,16 +426,34 @@ private static Boolean shouldExcludeVectorsFromSourceExplicit(FetchSourceContext } public static boolean shouldExcludeInferenceFieldsFromSource(IndexSettings indexSettings, FetchSourceContext fetchSourceContext) { - var explicit = shouldExcludeInferenceFieldsFromSourceExplicit(fetchSourceContext); + if (fetchSourceContext != null && fetchSourceContext.fetchSource() == false) { + // Source is disabled + return true; + } + + Boolean filtered = null; var filter = fetchSourceContext != null ? fetchSourceContext.filter() : null; if (filter != null) { if (filter.isPathFiltered(InferenceMetadataFieldsMapper.NAME, true)) { - return true; + filtered = true; } else if (filter.isExplicitlyIncluded(InferenceMetadataFieldsMapper.NAME)) { - return false; + filtered = false; } } - return explicit != null ? explicit : INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING.get(indexSettings.getSettings()); + if (filtered != null) { + return filtered; + } + + Boolean excludeInferenceFieldsExplicit = shouldExcludeInferenceFieldsFromSourceExplicit(fetchSourceContext); + if (excludeInferenceFieldsExplicit != null) { + return excludeInferenceFieldsExplicit; + } + + // We always default to excluding the inference metadata field. We only use the index setting when it is explicitly set. + Settings settings = indexSettings.getSettings(); + return INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING.exists(settings) + ? INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING.get(settings) + : true; } private static Boolean shouldExcludeInferenceFieldsFromSourceExplicit(FetchSourceContext fetchSourceContext) { From 93b521050eb1535f66e0374521b20ec10445a6c9 Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Wed, 8 Oct 2025 14:39:11 -0400 Subject: [PATCH 06/15] Updated test --- .../integration/SemanticTextEmbeddingsIT.java | 28 +++++++++++++++++-- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/integration/SemanticTextEmbeddingsIT.java b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/integration/SemanticTextEmbeddingsIT.java index 9f546d1fe80d0..dae2614094970 100644 --- a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/integration/SemanticTextEmbeddingsIT.java +++ b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/integration/SemanticTextEmbeddingsIT.java @@ -13,6 +13,7 @@ import org.elasticsearch.action.support.IndicesOptions; import org.elasticsearch.cluster.metadata.IndexMetadata; import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.common.document.DocumentField; import org.elasticsearch.common.io.stream.NamedWriteableRegistry; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.IndexVersion; @@ -51,6 +52,8 @@ import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertResponse; import static org.hamcrest.CoreMatchers.equalTo; +import static org.hamcrest.CoreMatchers.is; +import static org.hamcrest.CoreMatchers.nullValue; @ESIntegTestCase.ClusterScope(minNumDataNodes = 3, maxNumDataNodes = 5) public class SemanticTextEmbeddingsIT extends ESIntegTestCase { @@ -110,7 +113,6 @@ public void cleanUp() { } } - // TODO: Complete test once fix is implemented public void testSourceDisabledAndIncludeVectors() throws Exception { // Get a random index version after when we default to the new semantic text format, and before we exclude vectors in source by // default @@ -142,7 +144,27 @@ public void testSourceDisabledAndIncludeVectors() throws Exception { request -> request.source().fetchSource(false).fetchField(sparseEmbeddingField), response -> { for (SearchHit hit : response.getHits()) { - hit.getDocumentFields(); + assertThat(hit.getSourceAsMap(), nullValue()); + + Map documentFields = hit.getDocumentFields(); + assertThat(documentFields.size(), is(1)); + assertThat(documentFields.containsKey(sparseEmbeddingField), is(true)); + } + } + ); + + QueryBuilder textEmbeddingFieldQuery = new SemanticQueryBuilder(textEmbeddingField, randomAlphaOfLength(10)); + assertSearchResponse( + textEmbeddingFieldQuery, + 10, + request -> request.source().fetchSource(false).fetchField(textEmbeddingField), + response -> { + for (SearchHit hit : response.getHits()) { + assertThat(hit.getSourceAsMap(), nullValue()); + + Map documentFields = hit.getDocumentFields(); + assertThat(documentFields.size(), is(1)); + assertThat(documentFields.containsKey(textEmbeddingField), is(true)); } } ); @@ -191,7 +213,7 @@ private void indexDocuments(String field, int count) { for (int i = 0; i < count; i++) { Map source = Map.of(field, randomAlphaOfLength(10)); DocWriteResponse response = client().prepareIndex(indexName).setSource(source).get(TEST_REQUEST_TIMEOUT); - assertThat(response.getResult(), equalTo(DocWriteResponse.Result.CREATED)); + assertThat(response.getResult(), is(DocWriteResponse.Result.CREATED)); } client().admin().indices().prepareRefresh(indexName).get(); From c56896da176f0e728df35191eb65541d110312cf Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Thu, 9 Oct 2025 12:14:12 -0400 Subject: [PATCH 07/15] Updated ShardGetServiceTests --- .../index/shard/ShardGetServiceTests.java | 119 ++++++++++++++++++ 1 file changed, 119 insertions(+) diff --git a/server/src/test/java/org/elasticsearch/index/shard/ShardGetServiceTests.java b/server/src/test/java/org/elasticsearch/index/shard/ShardGetServiceTests.java index a10a23db0b838..c38cde538495f 100644 --- a/server/src/test/java/org/elasticsearch/index/shard/ShardGetServiceTests.java +++ b/server/src/test/java/org/elasticsearch/index/shard/ShardGetServiceTests.java @@ -16,6 +16,7 @@ import org.elasticsearch.core.Strings; import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.IndexVersion; +import org.elasticsearch.index.IndexVersions; import org.elasticsearch.index.VersionType; import org.elasticsearch.index.engine.Engine; import org.elasticsearch.index.engine.EngineTestCase; @@ -24,9 +25,13 @@ import org.elasticsearch.index.engine.TranslogOperationAsserter; import org.elasticsearch.index.engine.VersionConflictEngineException; import org.elasticsearch.index.get.GetResult; +import org.elasticsearch.index.get.ShardGetService; +import org.elasticsearch.index.mapper.InferenceMetadataFieldsMapper; import org.elasticsearch.index.mapper.RoutingFieldMapper; import org.elasticsearch.index.translog.Translog; import org.elasticsearch.search.fetch.subphase.FetchSourceContext; +import org.elasticsearch.search.lookup.SourceFilter; +import org.elasticsearch.test.index.IndexVersionUtils; import org.elasticsearch.xcontent.XContentBuilder; import org.elasticsearch.xcontent.XContentFactory; import org.elasticsearch.xcontent.XContentParser; @@ -36,6 +41,7 @@ import java.util.Arrays; import java.util.function.LongSupplier; +import static org.elasticsearch.index.IndexSettings.INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING; import static org.elasticsearch.index.seqno.SequenceNumbers.UNASSIGNED_PRIMARY_TERM; import static org.elasticsearch.index.seqno.SequenceNumbers.UNASSIGNED_SEQ_NO; import static org.hamcrest.Matchers.equalTo; @@ -411,6 +417,29 @@ public void testGetFromTranslog() throws IOException { closeShards(primary); } + public void testShouldExcludeInferenceFieldsFromSource() { + for (int i = 0; i < 100; i++) { + ExcludeInferenceFieldsTestScenario scenario = new ExcludeInferenceFieldsTestScenario(IndexVersion.current()); + assertThat( + ShardGetService.shouldExcludeInferenceFieldsFromSource(scenario.indexSettings, scenario.fetchSourceContext), + equalTo(scenario.shouldExcludeInferenceFields()) + ); + } + + for (int i = 0; i < 200; i++) { + IndexVersion indexVersion = IndexVersionUtils.randomVersionBetween( + random(), + IndexVersions.MINIMUM_COMPATIBLE, + IndexVersionUtils.getPreviousVersion(IndexVersion.current()) + ); + ExcludeInferenceFieldsTestScenario scenario = new ExcludeInferenceFieldsTestScenario(indexVersion); + assertThat( + ShardGetService.shouldExcludeInferenceFieldsFromSource(scenario.indexSettings, scenario.fetchSourceContext), + equalTo(scenario.shouldExcludeInferenceFields()) + ); + } + } + Translog.Index toIndexOp(String source) throws IOException { XContentParser parser = createParser(XContentType.JSON.xContent(), source); XContentBuilder builder = XContentFactory.jsonBuilder(); @@ -425,4 +454,94 @@ Translog.Index toIndexOp(String source) throws IOException { IndexRequest.UNSET_AUTO_GENERATED_TIMESTAMP ); } + + private static class ExcludeInferenceFieldsTestScenario { + private final IndexSettings indexSettings; + private final FetchSourceContext fetchSourceContext; + + private ExcludeInferenceFieldsTestScenario(IndexVersion indexVersion) { + this.indexSettings = generateRandomIndexSettings(indexVersion); + this.fetchSourceContext = generateRandomFetchSourceContext(); + } + + private boolean shouldExcludeInferenceFields() { + if (fetchSourceContext != null && fetchSourceContext.fetchSource() == false) { + return true; + } + + Boolean filtered = null; + SourceFilter filter = fetchSourceContext != null ? fetchSourceContext.filter() : null; + if (filter != null) { + if (Arrays.asList(filter.getExcludes()).contains(InferenceMetadataFieldsMapper.NAME)) { + filtered = true; + } else if (filter.getIncludes().length > 0) { + filtered = Arrays.asList(filter.getIncludes()).contains(InferenceMetadataFieldsMapper.NAME) == false; + } + } + if (filtered != null) { + return filtered; + } + + Boolean excludeInferenceFieldsExplicit = fetchSourceContext != null ? fetchSourceContext.excludeInferenceFields() : null; + if (excludeInferenceFieldsExplicit != null) { + return excludeInferenceFieldsExplicit; + } + + Settings settings = indexSettings.getSettings(); + return INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING.exists(settings) + ? INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING.get(settings) + : true; + } + + private static IndexSettings generateRandomIndexSettings(IndexVersion indexVersion) { + Settings.Builder settings = Settings.builder() + .put(IndexMetadata.SETTING_VERSION_CREATED, indexVersion) + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0); + if (randomBoolean()) { + boolean excludeSourceVectors = randomBoolean(); + settings.put(IndexSettings.INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING.getKey(), excludeSourceVectors); + } + + return new IndexSettings(IndexMetadata.builder(randomIdentifier()).settings(settings).build(), settings.build()); + } + + private static FetchSourceContext generateRandomFetchSourceContext() { + FetchSourceContext fetchSourceContext = switch (randomIntBetween(0, 4)) { + case 0 -> FetchSourceContext.FETCH_SOURCE; + case 1 -> FetchSourceContext.FETCH_ALL_SOURCE; + case 2 -> FetchSourceContext.FETCH_ALL_SOURCE_EXCLUDE_INFERENCE_FIELDS; + case 3 -> FetchSourceContext.DO_NOT_FETCH_SOURCE; + case 4 -> null; + default -> throw new IllegalStateException("Unhandled randomized case"); + }; + + if (fetchSourceContext != null && fetchSourceContext.fetchSource()) { + String[] includes = null; + String[] excludes = null; + if (randomBoolean()) { + // Randomly include a non-existent field to test explicit inclusion handling + String field = randomBoolean() ? InferenceMetadataFieldsMapper.NAME : randomIdentifier(); + includes = new String[] { field }; + } + if (randomBoolean()) { + // Randomly exclude a non-existent field to test implicit inclusion handling + String field = randomBoolean() ? InferenceMetadataFieldsMapper.NAME : randomIdentifier(); + excludes = new String[] { field }; + } + + if (includes != null || excludes != null) { + fetchSourceContext = FetchSourceContext.of( + fetchSourceContext.fetchSource(), + fetchSourceContext.excludeVectors(), + fetchSourceContext.excludeInferenceFields(), + includes, + excludes + ); + } + } + + return fetchSourceContext; + } + } } From 0ef2be556857cb6f0bf71a53480d91e24b29fa2f Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Thu, 9 Oct 2025 13:55:54 -0400 Subject: [PATCH 08/15] Added randomized integration tests --- .../integration/SemanticTextEmbeddingsIT.java | 231 +++++++++++++----- 1 file changed, 174 insertions(+), 57 deletions(-) diff --git a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/integration/SemanticTextEmbeddingsIT.java b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/integration/SemanticTextEmbeddingsIT.java index dae2614094970..aad0f06a293f7 100644 --- a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/integration/SemanticTextEmbeddingsIT.java +++ b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/integration/SemanticTextEmbeddingsIT.java @@ -16,7 +16,10 @@ import org.elasticsearch.common.document.DocumentField; import org.elasticsearch.common.io.stream.NamedWriteableRegistry; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.core.Nullable; import org.elasticsearch.index.IndexVersion; +import org.elasticsearch.index.IndexVersions; +import org.elasticsearch.index.mapper.InferenceMetadataFieldsMapper; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.inference.TaskType; import org.elasticsearch.license.LicenseSettings; @@ -24,6 +27,8 @@ import org.elasticsearch.reindex.ReindexPlugin; import org.elasticsearch.search.SearchHit; import org.elasticsearch.search.builder.SearchSourceBuilder; +import org.elasticsearch.search.fetch.subphase.FetchSourceContext; +import org.elasticsearch.search.lookup.SourceFilter; import org.elasticsearch.test.ESIntegTestCase; import org.elasticsearch.test.index.IndexVersionUtils; import org.elasticsearch.xcontent.XContentBuilder; @@ -41,18 +46,19 @@ import org.junit.After; import java.io.IOException; +import java.util.Arrays; import java.util.Collection; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.function.Consumer; -import static org.elasticsearch.index.IndexVersions.EXCLUDE_SOURCE_VECTORS_DEFAULT; -import static org.elasticsearch.index.mapper.InferenceMetadataFieldsMapper.USE_NEW_SEMANTIC_TEXT_FORMAT_BY_DEFAULT; +import static org.elasticsearch.index.IndexSettings.INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertResponse; import static org.hamcrest.CoreMatchers.equalTo; import static org.hamcrest.CoreMatchers.is; +import static org.hamcrest.CoreMatchers.notNullValue; import static org.hamcrest.CoreMatchers.nullValue; @ESIntegTestCase.ClusterScope(minNumDataNodes = 3, maxNumDataNodes = 5) @@ -89,18 +95,7 @@ protected boolean forbidPrivateIndexSettings() { @After public void cleanUp() { - assertAcked( - safeGet( - client().admin() - .indices() - .prepareDelete(indexName) - .setIndicesOptions( - IndicesOptions.builder().concreteTargetOptions(new IndicesOptions.ConcreteTargetOptions(true)).build() - ) - .execute() - ) - ); - + deleteIndex(indexName); for (var entry : inferenceIds.entrySet()) { assertAcked( safeGet( @@ -113,15 +108,20 @@ public void cleanUp() { } } - public void testSourceDisabledAndIncludeVectors() throws Exception { - // Get a random index version after when we default to the new semantic text format, and before we exclude vectors in source by - // default - final IndexVersion indexVersion = IndexVersionUtils.randomVersionBetween( - random(), - USE_NEW_SEMANTIC_TEXT_FORMAT_BY_DEFAULT, - IndexVersionUtils.getPreviousVersion(EXCLUDE_SOURCE_VECTORS_DEFAULT) + public void testExcludeInferenceFieldsFromSource() throws Exception { + excludeInferenceFieldsFromSourceTestCase(IndexVersion.current(), IndexVersion.current(), 10); + } + + public void testExcludeInferenceFieldsFromSourceOldIndexVersions() throws Exception { + excludeInferenceFieldsFromSourceTestCase( + IndexVersions.SEMANTIC_TEXT_FIELD_TYPE, + IndexVersionUtils.getPreviousVersion(IndexVersion.current()), + 40 ); + } + private void excludeInferenceFieldsFromSourceTestCase(IndexVersion minIndexVersion, IndexVersion maxIndexVersion, int iterations) + throws Exception { final String sparseEmbeddingInferenceId = randomIdentifier(); final String textEmbeddingInferenceId = randomIdentifier(); createInferenceEndpoint(TaskType.SPARSE_EMBEDDING, sparseEmbeddingInferenceId, SPARSE_EMBEDDING_SERVICE_SETTINGS); @@ -129,45 +129,43 @@ public void testSourceDisabledAndIncludeVectors() throws Exception { final String sparseEmbeddingField = randomIdentifier(); final String textEmbeddingField = randomIdentifier(); - XContentBuilder mappings = generateMapping( - Map.of(sparseEmbeddingField, sparseEmbeddingInferenceId, textEmbeddingField, textEmbeddingInferenceId) - ); - assertAcked(prepareCreate(indexName).setSettings(generateIndexSettings(indexVersion)).setMapping(mappings)); - indexDocuments(sparseEmbeddingField, 10); - indexDocuments(textEmbeddingField, 10); + for (int i = 0; i < iterations; i++) { + final IndexVersion indexVersion = IndexVersionUtils.randomVersionBetween(random(), minIndexVersion, maxIndexVersion); + final Settings indexSettings = generateRandomIndexSettings(indexVersion); + XContentBuilder mappings = generateMapping( + Map.of(sparseEmbeddingField, sparseEmbeddingInferenceId, textEmbeddingField, textEmbeddingInferenceId) + ); + assertAcked(prepareCreate(indexName).setSettings(indexSettings).setMapping(mappings)); - QueryBuilder sparseEmbeddingFieldQuery = new SemanticQueryBuilder(sparseEmbeddingField, randomAlphaOfLength(10)); - assertSearchResponse( - sparseEmbeddingFieldQuery, - 10, - request -> request.source().fetchSource(false).fetchField(sparseEmbeddingField), - response -> { - for (SearchHit hit : response.getHits()) { - assertThat(hit.getSourceAsMap(), nullValue()); + final int docCount = randomIntBetween(10, 50); + indexDocuments(sparseEmbeddingField, docCount); + indexDocuments(textEmbeddingField, docCount); + QueryBuilder sparseEmbeddingFieldQuery = new SemanticQueryBuilder(sparseEmbeddingField, randomAlphaOfLength(10)); + assertSearchResponse(sparseEmbeddingFieldQuery, indexSettings, docCount, request -> { + request.source().fetchSource(generateRandomFetchSourceContext()).fetchField(sparseEmbeddingField); + }, response -> { + for (SearchHit hit : response.getHits()) { Map documentFields = hit.getDocumentFields(); assertThat(documentFields.size(), is(1)); assertThat(documentFields.containsKey(sparseEmbeddingField), is(true)); } - } - ); + }); - QueryBuilder textEmbeddingFieldQuery = new SemanticQueryBuilder(textEmbeddingField, randomAlphaOfLength(10)); - assertSearchResponse( - textEmbeddingFieldQuery, - 10, - request -> request.source().fetchSource(false).fetchField(textEmbeddingField), - response -> { + QueryBuilder textEmbeddingFieldQuery = new SemanticQueryBuilder(textEmbeddingField, randomAlphaOfLength(10)); + assertSearchResponse(textEmbeddingFieldQuery, indexSettings, docCount, request -> { + request.source().fetchSource(generateRandomFetchSourceContext()).fetchField(textEmbeddingField); + }, response -> { for (SearchHit hit : response.getHits()) { - assertThat(hit.getSourceAsMap(), nullValue()); - Map documentFields = hit.getDocumentFields(); assertThat(documentFields.size(), is(1)); assertThat(documentFields.containsKey(textEmbeddingField), is(true)); } - } - ); + }); + + deleteIndex(indexName); + } } private void createInferenceEndpoint(TaskType taskType, String inferenceId, Map serviceSettings) throws IOException { @@ -200,13 +198,18 @@ private void createInferenceEndpoint(TaskType taskType, String inferenceId, Map< inferenceIds.put(inferenceId, taskType); } - private Settings generateIndexSettings(IndexVersion indexVersion) { + private Settings generateRandomIndexSettings(IndexVersion indexVersion) { int numDataNodes = internalCluster().numDataNodes(); - return Settings.builder() + Settings.Builder settings = Settings.builder() .put(IndexMetadata.SETTING_VERSION_CREATED, indexVersion) .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, numDataNodes) - .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0) - .build(); + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0); + + if (randomBoolean()) { + settings.put(INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING.getKey(), randomBoolean()); + } + + return settings.build(); } private void indexDocuments(String field, int count) { @@ -221,22 +224,116 @@ private void indexDocuments(String field, int count) { private void assertSearchResponse( QueryBuilder queryBuilder, - long expectedHits, - Consumer searchRequestModifier, - Consumer searchResponseValidator + Settings indexSettings, + int expectedHits, + @Nullable Consumer searchRequestModifier, + @Nullable Consumer searchResponseValidator ) throws Exception { - SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder().query(queryBuilder); + SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder().query(queryBuilder).size(expectedHits); SearchRequest searchRequest = new SearchRequest(new String[] { indexName }, searchSourceBuilder); if (searchRequestModifier != null) { searchRequestModifier.accept(searchRequest); } + ExpectedSource expectedSource = getExpectedSource(indexSettings, searchRequest.source().fetchSource()); assertResponse(client().search(searchRequest), response -> { - assertThat(response.getHits().getTotalHits().value(), equalTo(expectedHits)); - searchResponseValidator.accept(response); + assertThat(response.getSuccessfulShards(), equalTo(response.getTotalShards())); + assertThat(response.getHits().getTotalHits().value(), equalTo((long) expectedHits)); + + for (SearchHit hit : response.getHits()) { + switch (expectedSource) { + case NONE -> assertThat(hit.getSourceAsMap(), nullValue()); + case INFERENCE_FIELDS_EXCLUDED -> { + Map sourceAsMap = hit.getSourceAsMap(); + assertThat(sourceAsMap, notNullValue()); + assertThat(sourceAsMap.containsKey(InferenceMetadataFieldsMapper.NAME), is(false)); + } + case INFERENCE_FIELDS_INCLUDED -> { + Map sourceAsMap = hit.getSourceAsMap(); + assertThat(sourceAsMap, notNullValue()); + assertThat(sourceAsMap.containsKey(InferenceMetadataFieldsMapper.NAME), is(true)); + } + } + } + + if (searchResponseValidator != null) { + searchResponseValidator.accept(response); + } }); } + private static ExpectedSource getExpectedSource(Settings indexSettings, FetchSourceContext fetchSourceContext) { + if (fetchSourceContext != null && fetchSourceContext.fetchSource() == false) { + return ExpectedSource.NONE; + } else if (InferenceMetadataFieldsMapper.isEnabled(indexSettings) == false) { + return ExpectedSource.INFERENCE_FIELDS_EXCLUDED; + } + + if (fetchSourceContext != null) { + SourceFilter filter = fetchSourceContext.filter(); + if (filter != null) { + if (Arrays.asList(filter.getExcludes()).contains(InferenceMetadataFieldsMapper.NAME)) { + return ExpectedSource.INFERENCE_FIELDS_EXCLUDED; + } else if (filter.getIncludes().length > 0) { + return Arrays.asList(filter.getIncludes()).contains(InferenceMetadataFieldsMapper.NAME) + ? ExpectedSource.INFERENCE_FIELDS_INCLUDED + : ExpectedSource.INFERENCE_FIELDS_EXCLUDED; + } + } + + Boolean excludeInferenceFieldsExplicit = fetchSourceContext.excludeInferenceFields(); + if (excludeInferenceFieldsExplicit != null) { + return excludeInferenceFieldsExplicit ? ExpectedSource.INFERENCE_FIELDS_EXCLUDED : ExpectedSource.INFERENCE_FIELDS_INCLUDED; + } + } + + if (INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING.exists(indexSettings)) { + return INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING.get(indexSettings) + ? ExpectedSource.INFERENCE_FIELDS_EXCLUDED + : ExpectedSource.INFERENCE_FIELDS_INCLUDED; + } + + return ExpectedSource.INFERENCE_FIELDS_EXCLUDED; + } + + private static FetchSourceContext generateRandomFetchSourceContext() { + FetchSourceContext fetchSourceContext = switch (randomIntBetween(0, 4)) { + case 0 -> FetchSourceContext.FETCH_SOURCE; + case 1 -> FetchSourceContext.FETCH_ALL_SOURCE; + case 2 -> FetchSourceContext.FETCH_ALL_SOURCE_EXCLUDE_INFERENCE_FIELDS; + case 3 -> FetchSourceContext.DO_NOT_FETCH_SOURCE; + case 4 -> null; + default -> throw new IllegalStateException("Unhandled randomized case"); + }; + + if (fetchSourceContext != null && fetchSourceContext.fetchSource()) { + String[] includes = null; + String[] excludes = null; + if (randomBoolean()) { + // Randomly include a non-existent field to test explicit inclusion handling + String field = randomBoolean() ? InferenceMetadataFieldsMapper.NAME : randomIdentifier(); + includes = new String[] { field }; + } + if (randomBoolean()) { + // Randomly exclude a non-existent field to test implicit inclusion handling + String field = randomBoolean() ? InferenceMetadataFieldsMapper.NAME : randomIdentifier(); + excludes = new String[] { field }; + } + + if (includes != null || excludes != null) { + fetchSourceContext = FetchSourceContext.of( + fetchSourceContext.fetchSource(), + fetchSourceContext.excludeVectors(), + fetchSourceContext.excludeInferenceFields(), + includes, + excludes + ); + } + } + + return fetchSourceContext; + } + private static XContentBuilder generateMapping(Map semanticTextFields) throws IOException { XContentBuilder mapping = XContentFactory.jsonBuilder().startObject().startObject("properties"); for (var entry : semanticTextFields.entrySet()) { @@ -250,6 +347,26 @@ private static XContentBuilder generateMapping(Map semanticTextF return mapping; } + private static void deleteIndex(String indexName) { + assertAcked( + safeGet( + client().admin() + .indices() + .prepareDelete(indexName) + .setIndicesOptions( + IndicesOptions.builder().concreteTargetOptions(new IndicesOptions.ConcreteTargetOptions(true)).build() + ) + .execute() + ) + ); + } + + private enum ExpectedSource { + NONE, + INFERENCE_FIELDS_EXCLUDED, + INFERENCE_FIELDS_INCLUDED + } + public static class FakeMlPlugin extends Plugin { @Override public List getNamedWriteables() { From 0c7bf0c10c11e18a2fee43e3de2ecf638445a1b9 Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Thu, 9 Oct 2025 13:56:59 -0400 Subject: [PATCH 09/15] Renamed integration tests --- ...TextEmbeddingsIT.java => SemanticTextInferenceFieldsIT.java} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/integration/{SemanticTextEmbeddingsIT.java => SemanticTextInferenceFieldsIT.java} (99%) diff --git a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/integration/SemanticTextEmbeddingsIT.java b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/integration/SemanticTextInferenceFieldsIT.java similarity index 99% rename from x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/integration/SemanticTextEmbeddingsIT.java rename to x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/integration/SemanticTextInferenceFieldsIT.java index aad0f06a293f7..ec45172f4ead9 100644 --- a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/integration/SemanticTextEmbeddingsIT.java +++ b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/integration/SemanticTextInferenceFieldsIT.java @@ -62,7 +62,7 @@ import static org.hamcrest.CoreMatchers.nullValue; @ESIntegTestCase.ClusterScope(minNumDataNodes = 3, maxNumDataNodes = 5) -public class SemanticTextEmbeddingsIT extends ESIntegTestCase { +public class SemanticTextInferenceFieldsIT extends ESIntegTestCase { private final String indexName = randomIdentifier(); private final Map inferenceIds = new HashMap<>(); From 9aec173acdb61b8b20c6733ccfda744d0d2f07e3 Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Thu, 9 Oct 2025 14:07:32 -0400 Subject: [PATCH 10/15] Cleanup --- .../index/get/ShardGetService.java | 34 ++++++++------- .../index/shard/ShardGetServiceTests.java | 41 +++++++++---------- 2 files changed, 35 insertions(+), 40 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/get/ShardGetService.java b/server/src/main/java/org/elasticsearch/index/get/ShardGetService.java index c3fe554cb9a15..ec94c3a28e88b 100644 --- a/server/src/main/java/org/elasticsearch/index/get/ShardGetService.java +++ b/server/src/main/java/org/elasticsearch/index/get/ShardGetService.java @@ -426,27 +426,25 @@ private static Boolean shouldExcludeVectorsFromSourceExplicit(FetchSourceContext } public static boolean shouldExcludeInferenceFieldsFromSource(IndexSettings indexSettings, FetchSourceContext fetchSourceContext) { - if (fetchSourceContext != null && fetchSourceContext.fetchSource() == false) { - // Source is disabled - return true; - } + if (fetchSourceContext != null) { + if (fetchSourceContext.fetchSource() == false) { + // Source is disabled + return true; + } - Boolean filtered = null; - var filter = fetchSourceContext != null ? fetchSourceContext.filter() : null; - if (filter != null) { - if (filter.isPathFiltered(InferenceMetadataFieldsMapper.NAME, true)) { - filtered = true; - } else if (filter.isExplicitlyIncluded(InferenceMetadataFieldsMapper.NAME)) { - filtered = false; + var filter = fetchSourceContext.filter(); + if (filter != null) { + if (filter.isPathFiltered(InferenceMetadataFieldsMapper.NAME, true)) { + return true; + } else if (filter.isExplicitlyIncluded(InferenceMetadataFieldsMapper.NAME)) { + return false; + } } - } - if (filtered != null) { - return filtered; - } - Boolean excludeInferenceFieldsExplicit = shouldExcludeInferenceFieldsFromSourceExplicit(fetchSourceContext); - if (excludeInferenceFieldsExplicit != null) { - return excludeInferenceFieldsExplicit; + Boolean excludeInferenceFieldsExplicit = shouldExcludeInferenceFieldsFromSourceExplicit(fetchSourceContext); + if (excludeInferenceFieldsExplicit != null) { + return excludeInferenceFieldsExplicit; + } } // We always default to excluding the inference metadata field. We only use the index setting when it is explicitly set. diff --git a/server/src/test/java/org/elasticsearch/index/shard/ShardGetServiceTests.java b/server/src/test/java/org/elasticsearch/index/shard/ShardGetServiceTests.java index c38cde538495f..da1945a59782e 100644 --- a/server/src/test/java/org/elasticsearch/index/shard/ShardGetServiceTests.java +++ b/server/src/test/java/org/elasticsearch/index/shard/ShardGetServiceTests.java @@ -41,7 +41,6 @@ import java.util.Arrays; import java.util.function.LongSupplier; -import static org.elasticsearch.index.IndexSettings.INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING; import static org.elasticsearch.index.seqno.SequenceNumbers.UNASSIGNED_PRIMARY_TERM; import static org.elasticsearch.index.seqno.SequenceNumbers.UNASSIGNED_SEQ_NO; import static org.hamcrest.Matchers.equalTo; @@ -465,31 +464,29 @@ private ExcludeInferenceFieldsTestScenario(IndexVersion indexVersion) { } private boolean shouldExcludeInferenceFields() { - if (fetchSourceContext != null && fetchSourceContext.fetchSource() == false) { - return true; - } + if (fetchSourceContext != null) { + if (fetchSourceContext.fetchSource() == false) { + return true; + } - Boolean filtered = null; - SourceFilter filter = fetchSourceContext != null ? fetchSourceContext.filter() : null; - if (filter != null) { - if (Arrays.asList(filter.getExcludes()).contains(InferenceMetadataFieldsMapper.NAME)) { - filtered = true; - } else if (filter.getIncludes().length > 0) { - filtered = Arrays.asList(filter.getIncludes()).contains(InferenceMetadataFieldsMapper.NAME) == false; + SourceFilter filter = fetchSourceContext.filter(); + if (filter != null) { + if (Arrays.asList(filter.getExcludes()).contains(InferenceMetadataFieldsMapper.NAME)) { + return true; + } else if (filter.getIncludes().length > 0) { + return Arrays.asList(filter.getIncludes()).contains(InferenceMetadataFieldsMapper.NAME) == false; + } } - } - if (filtered != null) { - return filtered; - } - Boolean excludeInferenceFieldsExplicit = fetchSourceContext != null ? fetchSourceContext.excludeInferenceFields() : null; - if (excludeInferenceFieldsExplicit != null) { - return excludeInferenceFieldsExplicit; + Boolean excludeInferenceFieldsExplicit = fetchSourceContext.excludeInferenceFields(); + if (excludeInferenceFieldsExplicit != null) { + return excludeInferenceFieldsExplicit; + } } Settings settings = indexSettings.getSettings(); - return INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING.exists(settings) - ? INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING.get(settings) + return IndexSettings.INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING.exists(settings) + ? IndexSettings.INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING.get(settings) : true; } @@ -498,9 +495,9 @@ private static IndexSettings generateRandomIndexSettings(IndexVersion indexVersi .put(IndexMetadata.SETTING_VERSION_CREATED, indexVersion) .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0); + if (randomBoolean()) { - boolean excludeSourceVectors = randomBoolean(); - settings.put(IndexSettings.INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING.getKey(), excludeSourceVectors); + settings.put(IndexSettings.INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING.getKey(), randomBoolean()); } return new IndexSettings(IndexMetadata.builder(randomIdentifier()).settings(settings).build(), settings.build()); From cc093a8b7b254c21e86549b4f27b5f3101c1ba45 Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Thu, 9 Oct 2025 14:38:35 -0400 Subject: [PATCH 11/15] Update docs/changelog/136312.yaml --- docs/changelog/136312.yaml | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 docs/changelog/136312.yaml diff --git a/docs/changelog/136312.yaml b/docs/changelog/136312.yaml new file mode 100644 index 0000000000000..d9b8541ade038 --- /dev/null +++ b/docs/changelog/136312.yaml @@ -0,0 +1,5 @@ +pr: 136312 +summary: Fix inference fields handling on old indices +area: Relevance +type: bug +issues: [] From 0cba62366c7f748d7c821656df4bcf304ff081b9 Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Thu, 9 Oct 2025 14:40:29 -0400 Subject: [PATCH 12/15] Update changelog --- docs/changelog/136312.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/changelog/136312.yaml b/docs/changelog/136312.yaml index d9b8541ade038..e3b9d0bdcd3ba 100644 --- a/docs/changelog/136312.yaml +++ b/docs/changelog/136312.yaml @@ -1,5 +1,5 @@ pr: 136312 -summary: Fix inference fields handling on old indices -area: Relevance +summary: Fix _inference_fields handling on old indices +area: Vector Search type: bug -issues: [] +issues: [136130] From 854dfe41c99d2c903bc4db9177b6f3597781878c Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Thu, 9 Oct 2025 15:31:20 -0400 Subject: [PATCH 13/15] Remove dependency on index.mapping.exclude_source_vectors index setting --- .../index/get/ShardGetService.java | 13 ++---- .../search/fetch/FetchPhase.java | 3 +- .../index/shard/ShardGetServiceTests.java | 41 ++----------------- .../SemanticTextInferenceFieldsIT.java | 22 +++------- 4 files changed, 14 insertions(+), 65 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/get/ShardGetService.java b/server/src/main/java/org/elasticsearch/index/get/ShardGetService.java index ec94c3a28e88b..f6cf928cce412 100644 --- a/server/src/main/java/org/elasticsearch/index/get/ShardGetService.java +++ b/server/src/main/java/org/elasticsearch/index/get/ShardGetService.java @@ -21,7 +21,6 @@ import org.elasticsearch.common.metrics.CounterMetric; import org.elasticsearch.common.metrics.MeanMetric; import org.elasticsearch.common.regex.Regex; -import org.elasticsearch.common.settings.Settings; import org.elasticsearch.core.Nullable; import org.elasticsearch.core.Tuple; import org.elasticsearch.index.IndexSettings; @@ -312,8 +311,7 @@ private GetResult innerGetFetch( fetchSourceContext = res.v1(); } - if (mappingLookup.inferenceFields().isEmpty() == false - && shouldExcludeInferenceFieldsFromSource(indexSettings, fetchSourceContext) == false) { + if (mappingLookup.inferenceFields().isEmpty() == false && shouldExcludeInferenceFieldsFromSource(fetchSourceContext) == false) { storedFieldSet.add(InferenceMetadataFieldsMapper.NAME); } @@ -425,7 +423,7 @@ private static Boolean shouldExcludeVectorsFromSourceExplicit(FetchSourceContext return fetchSourceContext != null ? fetchSourceContext.excludeVectors() : null; } - public static boolean shouldExcludeInferenceFieldsFromSource(IndexSettings indexSettings, FetchSourceContext fetchSourceContext) { + public static boolean shouldExcludeInferenceFieldsFromSource(FetchSourceContext fetchSourceContext) { if (fetchSourceContext != null) { if (fetchSourceContext.fetchSource() == false) { // Source is disabled @@ -447,11 +445,8 @@ public static boolean shouldExcludeInferenceFieldsFromSource(IndexSettings index } } - // We always default to excluding the inference metadata field. We only use the index setting when it is explicitly set. - Settings settings = indexSettings.getSettings(); - return INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING.exists(settings) - ? INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING.get(settings) - : true; + // We always default to excluding the inference metadata field, unless the fetch source context says otherwise + return true; } private static Boolean shouldExcludeInferenceFieldsFromSourceExplicit(FetchSourceContext fetchSourceContext) { diff --git a/server/src/main/java/org/elasticsearch/search/fetch/FetchPhase.java b/server/src/main/java/org/elasticsearch/search/fetch/FetchPhase.java index 14c392b675a65..aa27e7d2f0c82 100644 --- a/server/src/main/java/org/elasticsearch/search/fetch/FetchPhase.java +++ b/server/src/main/java/org/elasticsearch/search/fetch/FetchPhase.java @@ -134,8 +134,7 @@ private SearchHits buildSearchHits(SearchContext context, int[] docIdsToLoad, Pr context.fetchSourceContext(res.v1()); } - if (lookup.inferenceFields().isEmpty() == false - && shouldExcludeInferenceFieldsFromSource(context.indexShard().indexSettings(), context.fetchSourceContext()) == false) { + if (lookup.inferenceFields().isEmpty() == false && shouldExcludeInferenceFieldsFromSource(context.fetchSourceContext()) == false) { // Rehydrate the inference fields into the {@code _source} because they were explicitly requested. var oldFetchFieldsContext = context.fetchFieldsContext(); var newFetchFieldsContext = new FetchFieldsContext(new ArrayList<>()); diff --git a/server/src/test/java/org/elasticsearch/index/shard/ShardGetServiceTests.java b/server/src/test/java/org/elasticsearch/index/shard/ShardGetServiceTests.java index da1945a59782e..3b059fd2d906b 100644 --- a/server/src/test/java/org/elasticsearch/index/shard/ShardGetServiceTests.java +++ b/server/src/test/java/org/elasticsearch/index/shard/ShardGetServiceTests.java @@ -16,7 +16,6 @@ import org.elasticsearch.core.Strings; import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.IndexVersion; -import org.elasticsearch.index.IndexVersions; import org.elasticsearch.index.VersionType; import org.elasticsearch.index.engine.Engine; import org.elasticsearch.index.engine.EngineTestCase; @@ -31,7 +30,6 @@ import org.elasticsearch.index.translog.Translog; import org.elasticsearch.search.fetch.subphase.FetchSourceContext; import org.elasticsearch.search.lookup.SourceFilter; -import org.elasticsearch.test.index.IndexVersionUtils; import org.elasticsearch.xcontent.XContentBuilder; import org.elasticsearch.xcontent.XContentFactory; import org.elasticsearch.xcontent.XContentParser; @@ -418,22 +416,9 @@ public void testGetFromTranslog() throws IOException { public void testShouldExcludeInferenceFieldsFromSource() { for (int i = 0; i < 100; i++) { - ExcludeInferenceFieldsTestScenario scenario = new ExcludeInferenceFieldsTestScenario(IndexVersion.current()); + ExcludeInferenceFieldsTestScenario scenario = new ExcludeInferenceFieldsTestScenario(); assertThat( - ShardGetService.shouldExcludeInferenceFieldsFromSource(scenario.indexSettings, scenario.fetchSourceContext), - equalTo(scenario.shouldExcludeInferenceFields()) - ); - } - - for (int i = 0; i < 200; i++) { - IndexVersion indexVersion = IndexVersionUtils.randomVersionBetween( - random(), - IndexVersions.MINIMUM_COMPATIBLE, - IndexVersionUtils.getPreviousVersion(IndexVersion.current()) - ); - ExcludeInferenceFieldsTestScenario scenario = new ExcludeInferenceFieldsTestScenario(indexVersion); - assertThat( - ShardGetService.shouldExcludeInferenceFieldsFromSource(scenario.indexSettings, scenario.fetchSourceContext), + ShardGetService.shouldExcludeInferenceFieldsFromSource(scenario.fetchSourceContext), equalTo(scenario.shouldExcludeInferenceFields()) ); } @@ -455,11 +440,9 @@ Translog.Index toIndexOp(String source) throws IOException { } private static class ExcludeInferenceFieldsTestScenario { - private final IndexSettings indexSettings; private final FetchSourceContext fetchSourceContext; - private ExcludeInferenceFieldsTestScenario(IndexVersion indexVersion) { - this.indexSettings = generateRandomIndexSettings(indexVersion); + private ExcludeInferenceFieldsTestScenario() { this.fetchSourceContext = generateRandomFetchSourceContext(); } @@ -484,23 +467,7 @@ private boolean shouldExcludeInferenceFields() { } } - Settings settings = indexSettings.getSettings(); - return IndexSettings.INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING.exists(settings) - ? IndexSettings.INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING.get(settings) - : true; - } - - private static IndexSettings generateRandomIndexSettings(IndexVersion indexVersion) { - Settings.Builder settings = Settings.builder() - .put(IndexMetadata.SETTING_VERSION_CREATED, indexVersion) - .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) - .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0); - - if (randomBoolean()) { - settings.put(IndexSettings.INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING.getKey(), randomBoolean()); - } - - return new IndexSettings(IndexMetadata.builder(randomIdentifier()).settings(settings).build(), settings.build()); + return true; } private static FetchSourceContext generateRandomFetchSourceContext() { diff --git a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/integration/SemanticTextInferenceFieldsIT.java b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/integration/SemanticTextInferenceFieldsIT.java index ec45172f4ead9..bb26153965499 100644 --- a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/integration/SemanticTextInferenceFieldsIT.java +++ b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/integration/SemanticTextInferenceFieldsIT.java @@ -53,7 +53,6 @@ import java.util.Map; import java.util.function.Consumer; -import static org.elasticsearch.index.IndexSettings.INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertResponse; import static org.hamcrest.CoreMatchers.equalTo; @@ -132,7 +131,7 @@ private void excludeInferenceFieldsFromSourceTestCase(IndexVersion minIndexVersi for (int i = 0; i < iterations; i++) { final IndexVersion indexVersion = IndexVersionUtils.randomVersionBetween(random(), minIndexVersion, maxIndexVersion); - final Settings indexSettings = generateRandomIndexSettings(indexVersion); + final Settings indexSettings = generateIndexSettings(indexVersion); XContentBuilder mappings = generateMapping( Map.of(sparseEmbeddingField, sparseEmbeddingInferenceId, textEmbeddingField, textEmbeddingInferenceId) ); @@ -198,18 +197,13 @@ private void createInferenceEndpoint(TaskType taskType, String inferenceId, Map< inferenceIds.put(inferenceId, taskType); } - private Settings generateRandomIndexSettings(IndexVersion indexVersion) { + private Settings generateIndexSettings(IndexVersion indexVersion) { int numDataNodes = internalCluster().numDataNodes(); - Settings.Builder settings = Settings.builder() + return Settings.builder() .put(IndexMetadata.SETTING_VERSION_CREATED, indexVersion) .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, numDataNodes) - .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0); - - if (randomBoolean()) { - settings.put(INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING.getKey(), randomBoolean()); - } - - return settings.build(); + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0) + .build(); } private void indexDocuments(String field, int count) { @@ -287,12 +281,6 @@ private static ExpectedSource getExpectedSource(Settings indexSettings, FetchSou } } - if (INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING.exists(indexSettings)) { - return INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING.get(indexSettings) - ? ExpectedSource.INFERENCE_FIELDS_EXCLUDED - : ExpectedSource.INFERENCE_FIELDS_INCLUDED; - } - return ExpectedSource.INFERENCE_FIELDS_EXCLUDED; } From 291bc45f9e3b71f50cd416308f1874b8e28bf691 Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Thu, 9 Oct 2025 15:33:18 -0400 Subject: [PATCH 14/15] Update docs/changelog/136312.yaml --- docs/changelog/136312.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/changelog/136312.yaml b/docs/changelog/136312.yaml index e3b9d0bdcd3ba..8c2f7b7fa9971 100644 --- a/docs/changelog/136312.yaml +++ b/docs/changelog/136312.yaml @@ -2,4 +2,4 @@ pr: 136312 summary: Fix _inference_fields handling on old indices area: Vector Search type: bug -issues: [136130] +issues: [] From f1084fab9416e7b3e761eff80d9b2214e7be25c5 Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Thu, 9 Oct 2025 15:38:08 -0400 Subject: [PATCH 15/15] Update changelog --- docs/changelog/136312.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/changelog/136312.yaml b/docs/changelog/136312.yaml index 8c2f7b7fa9971..e2da763cf13dd 100644 --- a/docs/changelog/136312.yaml +++ b/docs/changelog/136312.yaml @@ -2,4 +2,6 @@ pr: 136312 summary: Fix _inference_fields handling on old indices area: Vector Search type: bug -issues: [] +issues: [ + 136130 +]