From d5607b95194ded0b4069a5015c5efbe5903905df Mon Sep 17 00:00:00 2001 From: Jim Ferenczi Date: Thu, 25 Sep 2025 15:46:06 +0100 Subject: [PATCH] Respect include/exclude patterns when applying `exclude_vectors` This change ensures that the `_source` `include` and `exclude` patterns are applied as exceptions when `exclude_vectors` is enabled. Previously, `exclude_vectors` was enforced independently of any explicitly defined `include` or `exclude` rules. With this update, queries like: ```json { "_source": { "exclude_vectors": true, "includes": ["my_vector_field"] } } ``` will correctly include my_vector_field, overriding the exclude_vectors parameter. --- .../240_source_synthetic_dense_vectors.yml | 1 - .../250_source_synthetic_sparse_vectors.yml | 57 ++++++++++++++++++- .../index/get/ShardGetService.java | 6 ++ .../search/lookup/SourceFilter.java | 24 +++++++- .../search/lookup/SourceFilterTests.java | 50 ++++++++++++++++ .../rank_vectors_synthetic_vectors.yml | 1 - 6 files changed, 134 insertions(+), 5 deletions(-) diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/240_source_synthetic_dense_vectors.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/240_source_synthetic_dense_vectors.yml index 68f8c868b4e7e..a64b7f4ca3723 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/240_source_synthetic_dense_vectors.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/240_source_synthetic_dense_vectors.yml @@ -153,7 +153,6 @@ setup: index: test body: _source: - exclude_vectors: false includes: nested.vector sort: ["name"] diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/250_source_synthetic_sparse_vectors.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/250_source_synthetic_sparse_vectors.yml index 8397b48866204..562a69bdfa16f 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/250_source_synthetic_sparse_vectors.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/250_source_synthetic_sparse_vectors.yml @@ -165,7 +165,6 @@ setup: index: test body: _source: - exclude_vectors: false includes: nested.emb sort: ["name"] @@ -173,7 +172,42 @@ setup: - length: { hits.hits.0._source: 0} - match: { hits.hits.1._id: "2"} + - length: { hits.hits.1._source: 1 } + - length: { hits.hits.1._source.nested: 3 } + - exists: hits.hits.1._source.nested.0.emb + - not_exists: hits.hits.1._source.nested.0.paragraph_id + - exists: hits.hits.1._source.nested.1.emb + - not_exists: hits.hits.1._source.nested.1.paragraph_id + - exists: hits.hits.1._source.nested.2.emb + - not_exists: hits.hits.1._source.nested.2.paragraph_id + + - match: { hits.hits.2._id: "3" } + - length: { hits.hits.2._source: 0} + + - match: { hits.hits.3._id: "4" } - length: { hits.hits.3._source: 1 } + - length: { hits.hits.3._source.nested: 2 } + - exists: hits.hits.3._source.nested.0.emb + - length: { hits.hits.3._source.nested.0.emb: 3 } + - not_exists: hits.hits.3._source.nested.0.paragraph_id + - exists: hits.hits.3._source.nested.1.emb + - length: { hits.hits.3._source.nested.1.emb: 1 } + - not_exists: hits.hits.3._source.nested.1.paragraph_id + + - do: + search: + index: test + body: + _source: + exclude_vectors: true + includes: nested.emb + sort: ["name"] + + - match: { hits.hits.0._id: "1"} + - length: { hits.hits.0._source: 0} + + - match: { hits.hits.1._id: "2"} + - length: { hits.hits.1._source: 1 } - length: { hits.hits.1._source.nested: 3 } - exists: hits.hits.1._source.nested.0.emb - not_exists: hits.hits.1._source.nested.0.paragraph_id @@ -195,6 +229,27 @@ setup: - length: { hits.hits.3._source.nested.1.emb: 1 } - not_exists: hits.hits.3._source.nested.1.paragraph_id + - do: + search: + index: test + body: + _source: + exclude_vectors: true + includes: nested.emb + excludes: nested* + sort: ["name"] + + - match: { hits.hits.0._id: "1"} + - length: { hits.hits.0._source: 0} + + - match: { hits.hits.1._id: "2"} + - length: { hits.hits.1._source: 0 } + - match: { hits.hits.2._id: "3" } + - length: { hits.hits.2._source: 0} + + - match: { hits.hits.3._id: "4" } + - length: { hits.hits.3._source: 0 } + - do: headers: # Force JSON content type so that we use a parser that interprets the embeddings as doubles diff --git a/server/src/main/java/org/elasticsearch/index/get/ShardGetService.java b/server/src/main/java/org/elasticsearch/index/get/ShardGetService.java index 88e199ebac359..fb9763831212d 100644 --- a/server/src/main/java/org/elasticsearch/index/get/ShardGetService.java +++ b/server/src/main/java/org/elasticsearch/index/get/ShardGetService.java @@ -458,8 +458,14 @@ public static Tuple maybeExcludeVectorFields( ) : null; + SourceFilter filter = fetchSourceContext != null ? fetchSourceContext.filter() : null; + List lateExcludes = new ArrayList<>(); var excludes = mappingLookup.getFullNameToFieldType().values().stream().filter(MappedFieldType::isVectorEmbedding).filter(f -> { + // Keep the vector fields that are explicitly included and not explicitly excluded + if (filter != null && filter.isExplicitlyIncluded(f.name())) { + return filter.isPathFiltered(f.name(), false); + } // Exclude the field specified by the `fields` option if (fetchFieldsAut != null && fetchFieldsAut.run(f.name())) { lateExcludes.add(f.name()); diff --git a/server/src/main/java/org/elasticsearch/search/lookup/SourceFilter.java b/server/src/main/java/org/elasticsearch/search/lookup/SourceFilter.java index 90034ef447c92..cd28113984177 100644 --- a/server/src/main/java/org/elasticsearch/search/lookup/SourceFilter.java +++ b/server/src/main/java/org/elasticsearch/search/lookup/SourceFilter.java @@ -68,6 +68,26 @@ public String[] getExcludes() { return excludes; } + /** + * Checks if the given path matches at least one explicitly defined include pattern. + *

+ * If no include patterns are defined, this method always returns {@code false}. + * + * @param fullPath the full path to evaluate + * @return {@code true} if the path matches any explicitly defined include pattern, + * {@code false} otherwise + */ + public boolean isExplicitlyIncluded(String fullPath) { + if (includes.length == 0) { + return false; + } + if (includeAut == null) { + includeAut = XContentMapValues.compileAutomaton(includes, new CharacterRunAutomaton(Automata.makeAnyString())); + } + int state = step(includeAut, fullPath, 0); + return state != -1 && includeAut.isAccept(state); + } + /** * Determines whether the given full path should be filtered out. * @@ -77,7 +97,7 @@ public String[] getExcludes() { */ public boolean isPathFiltered(String fullPath, boolean isObject) { final boolean included; - if (includes != null) { + if (includes.length > 0) { if (includeAut == null) { includeAut = XContentMapValues.compileAutomaton(includes, new CharacterRunAutomaton(Automata.makeAnyString())); } @@ -87,7 +107,7 @@ public boolean isPathFiltered(String fullPath, boolean isObject) { included = true; } - if (excludes != null) { + if (excludes.length > 0) { if (excludeAut == null) { excludeAut = XContentMapValues.compileAutomaton(excludes, new CharacterRunAutomaton(Automata.makeEmpty())); } diff --git a/server/src/test/java/org/elasticsearch/search/lookup/SourceFilterTests.java b/server/src/test/java/org/elasticsearch/search/lookup/SourceFilterTests.java index bddfd53b2b120..1d572eba18075 100644 --- a/server/src/test/java/org/elasticsearch/search/lookup/SourceFilterTests.java +++ b/server/src/test/java/org/elasticsearch/search/lookup/SourceFilterTests.java @@ -156,4 +156,54 @@ public void testIncludeParentAndExcludeChildSubFieldsObjects() { Source filteredBytes = fromBytes.filter(new SourceFilter(new String[] { "myObject" }, new String[] { "myObject.myField" })); assertEquals(filteredBytes.source(), Map.of("myObject", Map.of("other", "otherValue"))); } + + public void testIsExplicitlyIncluded() { + var filter = new SourceFilter(null, null); + assertFalse(filter.isExplicitlyIncluded("foo")); + + filter = new SourceFilter(new String[] {}, null); + assertFalse(filter.isExplicitlyIncluded("foo")); + + filter = new SourceFilter(new String[] { "foo", "bar.*" }, null); + assertTrue(filter.isExplicitlyIncluded("foo")); + assertTrue(filter.isExplicitlyIncluded("bar.field")); + assertFalse(filter.isExplicitlyIncluded("baz")); + assertFalse(filter.isExplicitlyIncluded("bar")); + } + + public void testIsPathFilteredWithExcludes() { + var filter = new SourceFilter(null, new String[] { "foo", "bar.field" }); + assertTrue(filter.isPathFiltered("foo", true)); + assertTrue(filter.isPathFiltered("foo", false)); + + assertTrue(filter.isPathFiltered("bar.field", false)); + assertFalse(filter.isPathFiltered("baz", false)); + assertFalse(filter.isPathFiltered("bar", false)); + assertFalse(filter.isPathFiltered("bar", true)); + } + + public void testIsPathFilteredWithIncludes() { + var filter = new SourceFilter(new String[] { "foo", "bar.field" }, null); + assertFalse(filter.isPathFiltered("foo", true)); + assertFalse(filter.isPathFiltered("foo", false)); + + assertFalse(filter.isPathFiltered("bar.field", false)); + assertTrue(filter.isPathFiltered("baz", false)); + assertTrue(filter.isPathFiltered("bar", false)); + assertFalse(filter.isPathFiltered("bar", true)); + } + + public void testIsPathFilteredWithIncludesAndExcludes() { + var filter = new SourceFilter(new String[] { "foo", "bar.*", "nested.field" }, new String[] { "foo", "bar.field" }); + assertTrue(filter.isPathFiltered("foo", true)); + assertTrue(filter.isPathFiltered("foo", false)); + + assertTrue(filter.isPathFiltered("bar.field", false)); + assertTrue(filter.isPathFiltered("baz", false)); + assertTrue(filter.isPathFiltered("bar", false)); + assertFalse(filter.isPathFiltered("bar", true)); + + assertFalse(filter.isPathFiltered("nested.field", false)); + assertTrue(filter.isPathFiltered("nested.another", false)); + } } diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/rank_vectors/rank_vectors_synthetic_vectors.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/rank_vectors/rank_vectors_synthetic_vectors.yml index b39325d5147a8..4604677dbdb96 100644 --- a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/rank_vectors/rank_vectors_synthetic_vectors.yml +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/rank_vectors/rank_vectors_synthetic_vectors.yml @@ -154,7 +154,6 @@ setup: index: test body: _source: - exclude_vectors: false includes: nested.vector sort: ["name"]