Skip to content

Commit 6e67fac

Browse files
authored
Add option to include or exclude vectors from _source retrieval (#128735)
This PR introduces a new include_vectors option to the _source retrieval context. When set to false, vectors are excluded from the returned _source. This is especially efficient when used with synthetic source, as it avoids loading vector fields entirely. By default, vectors remain included unless explicitly excluded.
1 parent 53f3ab2 commit 6e67fac

File tree

12 files changed

+425
-18
lines changed

12 files changed

+425
-18
lines changed

docs/changelog/128735.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 128735
2+
summary: Add option to include or exclude vectors from `_source` retrieval
3+
area: Vector Search
4+
type: feature
5+
issues: []
Lines changed: 225 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,225 @@
1+
setup:
2+
- requires:
3+
reason: 'exclude_vectors option is required'
4+
test_runner_features: [ capabilities ]
5+
capabilities:
6+
- method: GET
7+
path: /_search
8+
capabilities: [ exclude_vectors_param ]
9+
- skip:
10+
features: "headers"
11+
12+
- do:
13+
indices.create:
14+
index: test
15+
body:
16+
mappings:
17+
properties:
18+
name:
19+
type: keyword
20+
sparse_vector:
21+
type: sparse_vector
22+
vector:
23+
type: dense_vector
24+
dims: 5
25+
similarity: l2_norm
26+
27+
nested:
28+
type: nested
29+
properties:
30+
paragraph_id:
31+
type: keyword
32+
vector:
33+
type: dense_vector
34+
dims: 5
35+
similarity: l2_norm
36+
sparse_vector:
37+
type: sparse_vector
38+
39+
- do:
40+
index:
41+
index: test
42+
id: "1"
43+
body:
44+
name: cow.jpg
45+
vector: [36, 267, -311, 12, -202]
46+
47+
- do:
48+
index:
49+
index: test
50+
id: "2"
51+
body:
52+
name: moose.jpg
53+
nested:
54+
- paragraph_id: 0
55+
vector: [-0.5, 100.0, -13, 14.8, -156.0]
56+
- paragraph_id: 2
57+
vector: [0, 100.0, 0, 14.8, -156.0]
58+
- paragraph_id: 3
59+
vector: [0, 1.0, 0, 1.8, -15.0]
60+
61+
- do:
62+
index:
63+
index: test
64+
id: "3"
65+
body:
66+
name: rabbit.jpg
67+
vector: [-0.5, 100.0, -13, 14.8, -156.0]
68+
sparse_vector:
69+
running: 3
70+
good: 17
71+
run: 22
72+
73+
- do:
74+
index:
75+
index: test
76+
id: "4"
77+
body:
78+
name: zoolander.jpg
79+
nested:
80+
- paragraph_id: 0
81+
vector: [ -0.5, 100.0, -13, 14.8, -156.0 ]
82+
sparse_vector:
83+
running: 3
84+
good: 17
85+
run: 22
86+
- paragraph_id: 1
87+
sparse_vector:
88+
modeling: 32
89+
model: 20
90+
mode: 54
91+
- paragraph_id: 2
92+
vector: [ -9.8, 109, 32, 14.8, 23 ]
93+
94+
95+
- do:
96+
indices.refresh: {}
97+
98+
---
99+
"exclude vectors":
100+
- do:
101+
search:
102+
index: test
103+
body:
104+
_source:
105+
exclude_vectors: true
106+
sort: ["name"]
107+
108+
- match: { hits.hits.0._id: "1"}
109+
- match: { hits.hits.0._source.name: "cow.jpg"}
110+
- not_exists: hits.hits.0._source.vector
111+
112+
- match: { hits.hits.1._id: "2"}
113+
- match: { hits.hits.1._source.name: "moose.jpg"}
114+
- length: { hits.hits.1._source.nested: 3 }
115+
- not_exists: hits.hits.1._source.nested.0.vector
116+
- match: { hits.hits.1._source.nested.0.paragraph_id: 0 }
117+
- not_exists: hits.hits.1._source.nested.1.vector
118+
- match: { hits.hits.1._source.nested.1.paragraph_id: 2 }
119+
- not_exists: hits.hits.1._source.nested.2.vector
120+
- match: { hits.hits.1._source.nested.2.paragraph_id: 3 }
121+
122+
- match: { hits.hits.2._id: "3" }
123+
- match: { hits.hits.2._source.name: "rabbit.jpg" }
124+
- not_exists: hits.hits.2._source.vector
125+
- not_exists: hits.hits.2._source.sparse_vector
126+
127+
- match: { hits.hits.3._id: "4" }
128+
- match: { hits.hits.3._source.name: "zoolander.jpg" }
129+
- length: { hits.hits.3._source.nested: 3 }
130+
- not_exists: hits.hits.3._source.nested.0.vector
131+
- not_exists: hits.hits.3._source.nested.0.sparse_vector
132+
- match: { hits.hits.3._source.nested.0.paragraph_id: 0 }
133+
- not_exists: hits.hits.3._source.nested.1.sparse_vector
134+
- match: { hits.hits.3._source.nested.1.paragraph_id: 1 }
135+
- not_exists: hits.hits.3._source.nested.2.vector
136+
- match: { hits.hits.3._source.nested.2.paragraph_id: 2 }
137+
138+
---
139+
"include vectors":
140+
- do:
141+
search:
142+
index: test
143+
body:
144+
_source:
145+
exclude_vectors: false
146+
sort: ["name"]
147+
148+
- match: { hits.hits.0._id: "1"}
149+
- match: { hits.hits.0._source.name: "cow.jpg"}
150+
- exists: hits.hits.0._source.vector
151+
152+
- match: { hits.hits.1._id: "2"}
153+
- match: { hits.hits.1._source.name: "moose.jpg"}
154+
- length: { hits.hits.1._source.nested: 3 }
155+
- exists: hits.hits.1._source.nested.0.vector
156+
- match: { hits.hits.1._source.nested.0.paragraph_id: 0 }
157+
- exists: hits.hits.1._source.nested.1.vector
158+
- match: { hits.hits.1._source.nested.1.paragraph_id: 2 }
159+
- exists: hits.hits.1._source.nested.2.vector
160+
- match: { hits.hits.1._source.nested.2.paragraph_id: 3 }
161+
162+
- match: { hits.hits.2._id: "3" }
163+
- match: { hits.hits.2._source.name: "rabbit.jpg" }
164+
- exists: hits.hits.2._source.vector
165+
- exists: hits.hits.2._source.sparse_vector
166+
167+
- match: { hits.hits.3._id: "4" }
168+
- match: { hits.hits.3._source.name: "zoolander.jpg" }
169+
- length: { hits.hits.3._source.nested: 3 }
170+
- exists: hits.hits.3._source.nested.0.vector
171+
- exists: hits.hits.3._source.nested.0.sparse_vector
172+
- match: { hits.hits.3._source.nested.0.paragraph_id: 0 }
173+
- exists: hits.hits.3._source.nested.1.sparse_vector
174+
- match: { hits.hits.3._source.nested.1.paragraph_id: 1 }
175+
- exists: hits.hits.3._source.nested.2.vector
176+
- match: { hits.hits.3._source.nested.2.paragraph_id: 2 }
177+
178+
---
179+
"exclude vectors with fields":
180+
- do:
181+
search:
182+
index: test
183+
body:
184+
_source:
185+
exclude_vectors: true
186+
sort: ["name"]
187+
fields: [vector, sparse_vector, nested.*]
188+
189+
- match: { hits.hits.0._id: "1"}
190+
- match: { hits.hits.0._source.name: "cow.jpg"}
191+
- not_exists: hits.hits.0._source.vector
192+
- exists: hits.hits.0.fields.vector
193+
194+
- match: { hits.hits.1._id: "2"}
195+
- match: { hits.hits.1._source.name: "moose.jpg"}
196+
- length: { hits.hits.1._source.nested: 3 }
197+
- not_exists: hits.hits.1._source.nested.0.vector
198+
- match: { hits.hits.1._source.nested.0.paragraph_id: 0 }
199+
- not_exists: hits.hits.1._source.nested.1.vector
200+
- match: { hits.hits.1._source.nested.1.paragraph_id: 2 }
201+
- not_exists: hits.hits.1._source.nested.2.vector
202+
- match: { hits.hits.1._source.nested.2.paragraph_id: 3 }
203+
204+
- match: { hits.hits.2._id: "3" }
205+
- match: { hits.hits.2._source.name: "rabbit.jpg" }
206+
- not_exists: hits.hits.2._source.vector
207+
- exists: hits.hits.2.fields.vector
208+
- not_exists: hits.hits.2._source.sparse_vector
209+
- exists: hits.hits.2.fields.sparse_vector
210+
211+
212+
- match: { hits.hits.3._id: "4" }
213+
- match: { hits.hits.3._source.name: "zoolander.jpg" }
214+
- length: { hits.hits.3._source.nested: 3 }
215+
- not_exists: hits.hits.3._source.nested.0.vector
216+
- exists: hits.hits.3.fields.nested.0.vector
217+
- not_exists: hits.hits.3._source.nested.0.sparse_vector
218+
- match: { hits.hits.3._source.nested.0.paragraph_id: 0 }
219+
- exists: hits.hits.3.fields.nested.0.sparse_vector
220+
- not_exists: hits.hits.3._source.nested.1.sparse_vector
221+
- match: { hits.hits.3._source.nested.1.paragraph_id: 1 }
222+
- exists: hits.hits.3.fields.nested.1.sparse_vector
223+
- not_exists: hits.hits.3._source.nested.2.vector
224+
- match: { hits.hits.3._source.nested.2.paragraph_id: 2 }
225+
- exists: hits.hits.3.fields.nested.2.vector

server/src/main/java/org/elasticsearch/TransportVersions.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,7 @@ static TransportVersion def(int id) {
191191
public static final TransportVersion ILM_ADD_SKIP_SETTING_8_19 = def(8_841_0_43);
192192
public static final TransportVersion ESQL_REGEX_MATCH_WITH_CASE_INSENSITIVITY_8_19 = def(8_841_0_44);
193193
public static final TransportVersion ESQL_QUERY_PLANNING_DURATION_8_19 = def(8_841_0_45);
194+
public static final TransportVersion SEARCH_SOURCE_EXCLUDE_VECTORS_PARAM_8_19 = def(8_841_0_46);
194195
public static final TransportVersion V_9_0_0 = def(9_000_0_09);
195196
public static final TransportVersion INITIAL_ELASTICSEARCH_9_0_1 = def(9_000_0_10);
196197
public static final TransportVersion INITIAL_ELASTICSEARCH_9_0_2 = def(9_000_0_11);
@@ -286,7 +287,7 @@ static TransportVersion def(int id) {
286287
public static final TransportVersion ILM_ADD_SKIP_SETTING = def(9_089_0_00);
287288
public static final TransportVersion ML_INFERENCE_MISTRAL_CHAT_COMPLETION_ADDED = def(9_090_0_00);
288289
public static final TransportVersion IDP_CUSTOM_SAML_ATTRIBUTES_ALLOW_LIST = def(9_091_0_00);
289-
290+
public static final TransportVersion SEARCH_SOURCE_EXCLUDE_VECTORS_PARAM = def(9_092_0_00);
290291
/*
291292
* STOP! READ THIS FIRST! No, really,
292293
* ____ _____ ___ ____ _ ____ _____ _ ____ _____ _ _ ___ ____ _____ ___ ____ ____ _____ _

server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,15 @@ public boolean isDimension() {
195195
return false;
196196
}
197197

198+
/**
199+
* Vector embeddings are typically large and not intended for human consumption, so such fields may be excluded from responses.
200+
*
201+
* @return true if this field contains vector embeddings.
202+
*/
203+
public boolean isVectorEmbedding() {
204+
return false;
205+
}
206+
198207
/**
199208
* @return true if field has script values.
200209
*/

server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2303,6 +2303,11 @@ public boolean isAggregatable() {
23032303
return false;
23042304
}
23052305

2306+
@Override
2307+
public boolean isVectorEmbedding() {
2308+
return true;
2309+
}
2310+
23062311
@Override
23072312
public IndexFieldData.Builder fielddataBuilder(FieldDataContext fieldDataContext) {
23082313
return elementType.fielddataBuilder(this, fieldDataContext);

server/src/main/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapper.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,11 @@ public String typeName() {
121121
return CONTENT_TYPE;
122122
}
123123

124+
@Override
125+
public boolean isVectorEmbedding() {
126+
return true;
127+
}
128+
124129
@Override
125130
public IndexFieldData.Builder fielddataBuilder(FieldDataContext fieldDataContext) {
126131
throw new IllegalArgumentException("[sparse_vector] fields do not support sorting, scripting or aggregating");

server/src/main/java/org/elasticsearch/rest/action/search/SearchCapabilities.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,8 @@ private SearchCapabilities() {}
4949
private static final String INDEX_SELECTOR_SYNTAX = "index_expression_selectors";
5050

5151
private static final String SIGNIFICANT_TERMS_BACKGROUND_FILTER_AS_SUB = "significant_terms_background_filter_as_sub";
52-
5352
private static final String SIGNIFICANT_TERMS_ON_NESTED_FIELDS = "significant_terms_on_nested_fields";
53+
private static final String EXCLUDE_VECTORS_PARAM = "exclude_vectors_param";
5454

5555
public static final Set<String> CAPABILITIES;
5656
static {
@@ -72,6 +72,7 @@ private SearchCapabilities() {}
7272
capabilities.add(INDEX_SELECTOR_SYNTAX);
7373
capabilities.add(SIGNIFICANT_TERMS_BACKGROUND_FILTER_AS_SUB);
7474
capabilities.add(SIGNIFICANT_TERMS_ON_NESTED_FIELDS);
75+
capabilities.add(EXCLUDE_VECTORS_PARAM);
7576
CAPABILITIES = Set.copyOf(capabilities);
7677
}
7778
}

server/src/main/java/org/elasticsearch/search/fetch/FetchContext.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,9 @@ private static FetchSourceContext buildFetchSourceContext(SearchContext in) {
6868
if (sfc != null && sfc.fetchFields()) {
6969
for (String field : sfc.fieldNames()) {
7070
if (SourceFieldMapper.NAME.equals(field)) {
71-
fsc = fsc == null ? FetchSourceContext.of(true) : FetchSourceContext.of(true, fsc.includes(), fsc.excludes());
71+
fsc = fsc == null
72+
? FetchSourceContext.of(true)
73+
: FetchSourceContext.of(true, fsc.excludeVectors(), fsc.includes(), fsc.excludes());
7274
}
7375
}
7476
}

0 commit comments

Comments
 (0)