Skip to content

Commit 763bfe3

Browse files
authored
Respect include/exclude patterns when applying exclude_vectors (#135444)
This change ensures that the `_source` `include` and `exclude` patterns are applied as exceptions when `exclude_vectors` is enabled. Previously, `exclude_vectors` was enforced independently of any explicitly defined `include` or `exclude` rules. With this update, queries like: ```json { "_source": { "exclude_vectors": true, "includes": ["my_vector_field"] } } ``` will correctly include my_vector_field, overriding the exclude_vectors parameter.
1 parent 945517b commit 763bfe3

File tree

6 files changed

+134
-5
lines changed

6 files changed

+134
-5
lines changed

rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/240_source_synthetic_dense_vectors.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,6 @@ setup:
153153
index: test
154154
body:
155155
_source:
156-
exclude_vectors: false
157156
includes: nested.vector
158157
sort: ["name"]
159158

rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/250_source_synthetic_sparse_vectors.yml

Lines changed: 56 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -165,15 +165,49 @@ setup:
165165
index: test
166166
body:
167167
_source:
168-
exclude_vectors: false
169168
includes: nested.emb
170169
sort: ["name"]
171170

172171
- match: { hits.hits.0._id: "1"}
173172
- length: { hits.hits.0._source: 0}
174173

175174
- match: { hits.hits.1._id: "2"}
175+
- length: { hits.hits.1._source: 1 }
176+
- length: { hits.hits.1._source.nested: 3 }
177+
- exists: hits.hits.1._source.nested.0.emb
178+
- not_exists: hits.hits.1._source.nested.0.paragraph_id
179+
- exists: hits.hits.1._source.nested.1.emb
180+
- not_exists: hits.hits.1._source.nested.1.paragraph_id
181+
- exists: hits.hits.1._source.nested.2.emb
182+
- not_exists: hits.hits.1._source.nested.2.paragraph_id
183+
184+
- match: { hits.hits.2._id: "3" }
185+
- length: { hits.hits.2._source: 0}
186+
187+
- match: { hits.hits.3._id: "4" }
176188
- length: { hits.hits.3._source: 1 }
189+
- length: { hits.hits.3._source.nested: 2 }
190+
- exists: hits.hits.3._source.nested.0.emb
191+
- length: { hits.hits.3._source.nested.0.emb: 3 }
192+
- not_exists: hits.hits.3._source.nested.0.paragraph_id
193+
- exists: hits.hits.3._source.nested.1.emb
194+
- length: { hits.hits.3._source.nested.1.emb: 1 }
195+
- not_exists: hits.hits.3._source.nested.1.paragraph_id
196+
197+
- do:
198+
search:
199+
index: test
200+
body:
201+
_source:
202+
exclude_vectors: true
203+
includes: nested.emb
204+
sort: ["name"]
205+
206+
- match: { hits.hits.0._id: "1"}
207+
- length: { hits.hits.0._source: 0}
208+
209+
- match: { hits.hits.1._id: "2"}
210+
- length: { hits.hits.1._source: 1 }
177211
- length: { hits.hits.1._source.nested: 3 }
178212
- exists: hits.hits.1._source.nested.0.emb
179213
- not_exists: hits.hits.1._source.nested.0.paragraph_id
@@ -195,6 +229,27 @@ setup:
195229
- length: { hits.hits.3._source.nested.1.emb: 1 }
196230
- not_exists: hits.hits.3._source.nested.1.paragraph_id
197231

232+
- do:
233+
search:
234+
index: test
235+
body:
236+
_source:
237+
exclude_vectors: true
238+
includes: nested.emb
239+
excludes: nested*
240+
sort: ["name"]
241+
242+
- match: { hits.hits.0._id: "1"}
243+
- length: { hits.hits.0._source: 0}
244+
245+
- match: { hits.hits.1._id: "2"}
246+
- length: { hits.hits.1._source: 0 }
247+
- match: { hits.hits.2._id: "3" }
248+
- length: { hits.hits.2._source: 0}
249+
250+
- match: { hits.hits.3._id: "4" }
251+
- length: { hits.hits.3._source: 0 }
252+
198253
- do:
199254
headers:
200255
# Force JSON content type so that we use a parser that interprets the embeddings as doubles

server/src/main/java/org/elasticsearch/index/get/ShardGetService.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -458,8 +458,14 @@ public static Tuple<FetchSourceContext, SourceFilter> maybeExcludeVectorFields(
458458
)
459459
: null;
460460

461+
SourceFilter filter = fetchSourceContext != null ? fetchSourceContext.filter() : null;
462+
461463
List<String> lateExcludes = new ArrayList<>();
462464
var excludes = mappingLookup.getFullNameToFieldType().values().stream().filter(MappedFieldType::isVectorEmbedding).filter(f -> {
465+
// Keep the vector fields that are explicitly included and not explicitly excluded
466+
if (filter != null && filter.isExplicitlyIncluded(f.name())) {
467+
return filter.isPathFiltered(f.name(), false);
468+
}
463469
// Exclude the field specified by the `fields` option
464470
if (fetchFieldsAut != null && fetchFieldsAut.run(f.name())) {
465471
lateExcludes.add(f.name());

server/src/main/java/org/elasticsearch/search/lookup/SourceFilter.java

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,26 @@ public String[] getExcludes() {
6868
return excludes;
6969
}
7070

71+
/**
72+
* Checks if the given path matches at least one explicitly defined include pattern.
73+
* <p>
74+
* If no include patterns are defined, this method always returns {@code false}.
75+
*
76+
* @param fullPath the full path to evaluate
77+
* @return {@code true} if the path matches any explicitly defined include pattern,
78+
* {@code false} otherwise
79+
*/
80+
public boolean isExplicitlyIncluded(String fullPath) {
81+
if (includes.length == 0) {
82+
return false;
83+
}
84+
if (includeAut == null) {
85+
includeAut = XContentMapValues.compileAutomaton(includes, new CharacterRunAutomaton(Automata.makeAnyString()));
86+
}
87+
int state = step(includeAut, fullPath, 0);
88+
return state != -1 && includeAut.isAccept(state);
89+
}
90+
7191
/**
7292
* Determines whether the given full path should be filtered out.
7393
*
@@ -77,7 +97,7 @@ public String[] getExcludes() {
7797
*/
7898
public boolean isPathFiltered(String fullPath, boolean isObject) {
7999
final boolean included;
80-
if (includes != null) {
100+
if (includes.length > 0) {
81101
if (includeAut == null) {
82102
includeAut = XContentMapValues.compileAutomaton(includes, new CharacterRunAutomaton(Automata.makeAnyString()));
83103
}
@@ -87,7 +107,7 @@ public boolean isPathFiltered(String fullPath, boolean isObject) {
87107
included = true;
88108
}
89109

90-
if (excludes != null) {
110+
if (excludes.length > 0) {
91111
if (excludeAut == null) {
92112
excludeAut = XContentMapValues.compileAutomaton(excludes, new CharacterRunAutomaton(Automata.makeEmpty()));
93113
}

server/src/test/java/org/elasticsearch/search/lookup/SourceFilterTests.java

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,4 +156,54 @@ public void testIncludeParentAndExcludeChildSubFieldsObjects() {
156156
Source filteredBytes = fromBytes.filter(new SourceFilter(new String[] { "myObject" }, new String[] { "myObject.myField" }));
157157
assertEquals(filteredBytes.source(), Map.of("myObject", Map.of("other", "otherValue")));
158158
}
159+
160+
public void testIsExplicitlyIncluded() {
161+
var filter = new SourceFilter(null, null);
162+
assertFalse(filter.isExplicitlyIncluded("foo"));
163+
164+
filter = new SourceFilter(new String[] {}, null);
165+
assertFalse(filter.isExplicitlyIncluded("foo"));
166+
167+
filter = new SourceFilter(new String[] { "foo", "bar.*" }, null);
168+
assertTrue(filter.isExplicitlyIncluded("foo"));
169+
assertTrue(filter.isExplicitlyIncluded("bar.field"));
170+
assertFalse(filter.isExplicitlyIncluded("baz"));
171+
assertFalse(filter.isExplicitlyIncluded("bar"));
172+
}
173+
174+
public void testIsPathFilteredWithExcludes() {
175+
var filter = new SourceFilter(null, new String[] { "foo", "bar.field" });
176+
assertTrue(filter.isPathFiltered("foo", true));
177+
assertTrue(filter.isPathFiltered("foo", false));
178+
179+
assertTrue(filter.isPathFiltered("bar.field", false));
180+
assertFalse(filter.isPathFiltered("baz", false));
181+
assertFalse(filter.isPathFiltered("bar", false));
182+
assertFalse(filter.isPathFiltered("bar", true));
183+
}
184+
185+
public void testIsPathFilteredWithIncludes() {
186+
var filter = new SourceFilter(new String[] { "foo", "bar.field" }, null);
187+
assertFalse(filter.isPathFiltered("foo", true));
188+
assertFalse(filter.isPathFiltered("foo", false));
189+
190+
assertFalse(filter.isPathFiltered("bar.field", false));
191+
assertTrue(filter.isPathFiltered("baz", false));
192+
assertTrue(filter.isPathFiltered("bar", false));
193+
assertFalse(filter.isPathFiltered("bar", true));
194+
}
195+
196+
public void testIsPathFilteredWithIncludesAndExcludes() {
197+
var filter = new SourceFilter(new String[] { "foo", "bar.*", "nested.field" }, new String[] { "foo", "bar.field" });
198+
assertTrue(filter.isPathFiltered("foo", true));
199+
assertTrue(filter.isPathFiltered("foo", false));
200+
201+
assertTrue(filter.isPathFiltered("bar.field", false));
202+
assertTrue(filter.isPathFiltered("baz", false));
203+
assertTrue(filter.isPathFiltered("bar", false));
204+
assertFalse(filter.isPathFiltered("bar", true));
205+
206+
assertFalse(filter.isPathFiltered("nested.field", false));
207+
assertTrue(filter.isPathFiltered("nested.another", false));
208+
}
159209
}

x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/rank_vectors/rank_vectors_synthetic_vectors.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,6 @@ setup:
154154
index: test
155155
body:
156156
_source:
157-
exclude_vectors: false
158157
includes: nested.vector
159158
sort: ["name"]
160159

0 commit comments

Comments
 (0)