Skip to content
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/changelog/128735.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 128735
summary: Add option to include or exclude vectors from `_source` retrieval
area: Vector Search
type: feature
issues: []
Original file line number Diff line number Diff line change
@@ -0,0 +1,225 @@
setup:
- requires:
reason: 'include_vectors option is required'
test_runner_features: [ capabilities ]
capabilities:
- method: GET
path: /_search
capabilities: [ include_vectors_param ]
- skip:
features: "headers"

- do:
indices.create:
index: test
body:
mappings:
properties:
name:
type: keyword
sparse_vector:
type: sparse_vector
vector:
type: dense_vector
dims: 5
similarity: l2_norm

nested:
type: nested
properties:
paragraph_id:
type: keyword
vector:
type: dense_vector
dims: 5
similarity: l2_norm
sparse_vector:
type: sparse_vector

- do:
index:
index: test
id: "1"
body:
name: cow.jpg
vector: [36, 267, -311, 12, -202]

- do:
index:
index: test
id: "2"
body:
name: moose.jpg
nested:
- paragraph_id: 0
vector: [-0.5, 100.0, -13, 14.8, -156.0]
- paragraph_id: 2
vector: [0, 100.0, 0, 14.8, -156.0]
- paragraph_id: 3
vector: [0, 1.0, 0, 1.8, -15.0]

- do:
index:
index: test
id: "3"
body:
name: rabbit.jpg
vector: [-0.5, 100.0, -13, 14.8, -156.0]
sparse_vector:
running: 3
good: 17
run: 22

- do:
index:
index: test
id: "4"
body:
name: zoolander.jpg
nested:
- paragraph_id: 0
vector: [ -0.5, 100.0, -13, 14.8, -156.0 ]
sparse_vector:
running: 3
good: 17
run: 22
- paragraph_id: 1
sparse_vector:
modeling: 32
model: 20
mode: 54
- paragraph_id: 2
vector: [ -9.8, 109, 32, 14.8, 23 ]


- do:
indices.refresh: {}

---
"exclude vectors":
- do:
search:
index: test
body:
_source:
include_vectors: false
sort: ["name"]

- match: { hits.hits.0._id: "1"}
- match: { hits.hits.0._source.name: "cow.jpg"}
- not_exists: hits.hits.0._source.vector

- match: { hits.hits.1._id: "2"}
- match: { hits.hits.1._source.name: "moose.jpg"}
- length: { hits.hits.1._source.nested: 3 }
- not_exists: hits.hits.1._source.nested.0.vector
- match: { hits.hits.1._source.nested.0.paragraph_id: 0 }
- not_exists: hits.hits.1._source.nested.1.vector
- match: { hits.hits.1._source.nested.1.paragraph_id: 2 }
- not_exists: hits.hits.1._source.nested.2.vector
- match: { hits.hits.1._source.nested.2.paragraph_id: 3 }

- match: { hits.hits.2._id: "3" }
- match: { hits.hits.2._source.name: "rabbit.jpg" }
- not_exists: hits.hits.2._source.vector
- not_exists: hits.hits.2._source.sparse_vector

- match: { hits.hits.3._id: "4" }
- match: { hits.hits.3._source.name: "zoolander.jpg" }
- length: { hits.hits.3._source.nested: 3 }
- not_exists: hits.hits.3._source.nested.0.vector
- not_exists: hits.hits.3._source.nested.0.sparse_vector
- match: { hits.hits.3._source.nested.0.paragraph_id: 0 }
- not_exists: hits.hits.3._source.nested.1.sparse_vector
- match: { hits.hits.3._source.nested.1.paragraph_id: 1 }
- not_exists: hits.hits.3._source.nested.2.vector
- match: { hits.hits.3._source.nested.2.paragraph_id: 2 }

---
"include vectors":
- do:
search:
index: test
body:
_source:
include_vectors: true
sort: ["name"]

- match: { hits.hits.0._id: "1"}
- match: { hits.hits.0._source.name: "cow.jpg"}
- exists: hits.hits.0._source.vector

- match: { hits.hits.1._id: "2"}
- match: { hits.hits.1._source.name: "moose.jpg"}
- length: { hits.hits.1._source.nested: 3 }
- exists: hits.hits.1._source.nested.0.vector
- match: { hits.hits.1._source.nested.0.paragraph_id: 0 }
- exists: hits.hits.1._source.nested.1.vector
- match: { hits.hits.1._source.nested.1.paragraph_id: 2 }
- exists: hits.hits.1._source.nested.2.vector
- match: { hits.hits.1._source.nested.2.paragraph_id: 3 }

- match: { hits.hits.2._id: "3" }
- match: { hits.hits.2._source.name: "rabbit.jpg" }
- exists: hits.hits.2._source.vector
- exists: hits.hits.2._source.sparse_vector

- match: { hits.hits.3._id: "4" }
- match: { hits.hits.3._source.name: "zoolander.jpg" }
- length: { hits.hits.3._source.nested: 3 }
- exists: hits.hits.3._source.nested.0.vector
- exists: hits.hits.3._source.nested.0.sparse_vector
- match: { hits.hits.3._source.nested.0.paragraph_id: 0 }
- exists: hits.hits.3._source.nested.1.sparse_vector
- match: { hits.hits.3._source.nested.1.paragraph_id: 1 }
- exists: hits.hits.3._source.nested.2.vector
- match: { hits.hits.3._source.nested.2.paragraph_id: 2 }

---
"exclude vectors with fields":
- do:
search:
index: test
body:
_source:
include_vectors: false
sort: ["name"]
fields: [vector, sparse_vector, nested.*]

- match: { hits.hits.0._id: "1"}
- match: { hits.hits.0._source.name: "cow.jpg"}
- not_exists: hits.hits.0._source.vector
- exists: hits.hits.0.fields.vector

- match: { hits.hits.1._id: "2"}
- match: { hits.hits.1._source.name: "moose.jpg"}
- length: { hits.hits.1._source.nested: 3 }
- not_exists: hits.hits.1._source.nested.0.vector
- match: { hits.hits.1._source.nested.0.paragraph_id: 0 }
- not_exists: hits.hits.1._source.nested.1.vector
- match: { hits.hits.1._source.nested.1.paragraph_id: 2 }
- not_exists: hits.hits.1._source.nested.2.vector
- match: { hits.hits.1._source.nested.2.paragraph_id: 3 }

- match: { hits.hits.2._id: "3" }
- match: { hits.hits.2._source.name: "rabbit.jpg" }
- not_exists: hits.hits.2._source.vector
- exists: hits.hits.2.fields.vector
- not_exists: hits.hits.2._source.sparse_vector
- exists: hits.hits.2.fields.sparse_vector


- match: { hits.hits.3._id: "4" }
- match: { hits.hits.3._source.name: "zoolander.jpg" }
- length: { hits.hits.3._source.nested: 3 }
- not_exists: hits.hits.3._source.nested.0.vector
- exists: hits.hits.3.fields.nested.0.vector
- not_exists: hits.hits.3._source.nested.0.sparse_vector
- match: { hits.hits.3._source.nested.0.paragraph_id: 0 }
- exists: hits.hits.3.fields.nested.0.sparse_vector
- not_exists: hits.hits.3._source.nested.1.sparse_vector
- match: { hits.hits.3._source.nested.1.paragraph_id: 1 }
- exists: hits.hits.3.fields.nested.1.sparse_vector
- not_exists: hits.hits.3._source.nested.2.vector
- match: { hits.hits.3._source.nested.2.paragraph_id: 2 }
- exists: hits.hits.3.fields.nested.2.vector
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,7 @@ static TransportVersion def(int id) {
public static final TransportVersion JOIN_ON_ALIASES_8_19 = def(8_841_0_42);
public static final TransportVersion ILM_ADD_SKIP_SETTING_8_19 = def(8_841_0_43);
public static final TransportVersion ESQL_REGEX_MATCH_WITH_CASE_INSENSITIVITY_8_19 = def(8_841_0_44);
public static final TransportVersion SEARCH_SOURCE_INCLUDE_VECTORS_PARAM_8_19 = def(8_841_0_45);
public static final TransportVersion V_9_0_0 = def(9_000_0_09);
public static final TransportVersion INITIAL_ELASTICSEARCH_9_0_1 = def(9_000_0_10);
public static final TransportVersion INITIAL_ELASTICSEARCH_9_0_2 = def(9_000_0_11);
Expand Down Expand Up @@ -285,7 +286,7 @@ static TransportVersion def(int id) {
public static final TransportVersion ILM_ADD_SKIP_SETTING = def(9_089_0_00);
public static final TransportVersion ML_INFERENCE_MISTRAL_CHAT_COMPLETION_ADDED = def(9_090_0_00);
public static final TransportVersion IDP_CUSTOM_SAML_ATTRIBUTES_ALLOW_LIST = def(9_091_0_00);

public static final TransportVersion SEARCH_SOURCE_INCLUDE_VECTORS_PARAM = def(9_092_0_00);
/*
* STOP! READ THIS FIRST! No, really,
* ____ _____ ___ ____ _ ____ _____ _ ____ _____ _ _ ___ ____ _____ ___ ____ ____ _____ _
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,15 @@ public boolean isDimension() {
return false;
}

/**
* Vector embeddings are typically large and not intended for human consumption, so such fields may be excluded from responses.
*
* @return true if this field contains vector embeddings.
*/
public boolean isVectorEmbedding() {
return false;
}

/**
* @return true if field has script values.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@
import org.elasticsearch.index.mapper.BlockDocValuesReader;
import org.elasticsearch.index.mapper.BlockLoader;
import org.elasticsearch.index.mapper.BlockSourceReader;
import org.elasticsearch.index.mapper.DocValueFetcher;
import org.elasticsearch.index.mapper.DocumentParserContext;
import org.elasticsearch.index.mapper.FieldMapper;
import org.elasticsearch.index.mapper.MappedFieldType;
Expand All @@ -75,6 +76,7 @@
import org.elasticsearch.index.query.SearchExecutionContext;
import org.elasticsearch.search.DocValueFormat;
import org.elasticsearch.search.aggregations.support.CoreValuesSourceType;
import org.elasticsearch.search.fetch.StoredFieldsSpec;
import org.elasticsearch.search.lookup.Source;
import org.elasticsearch.search.vectors.DenseVectorQuery;
import org.elasticsearch.search.vectors.ESDiversifyingChildrenByteKnnVectorQuery;
Expand Down Expand Up @@ -2285,6 +2287,13 @@ public ValueFetcher valueFetcher(SearchExecutionContext context, String format)
if (format != null) {
throw new IllegalArgumentException("Field [" + name() + "] of type [" + typeName() + "] doesn't support formats.");
}
if (context.getMappingLookup().isSourceSynthetic()) {
return new DocValueFetcher(
docValueFormat(null, null),
context.getForField(this, FielddataOperation.SEARCH),
StoredFieldsSpec.NO_REQUIREMENTS
);
}
return new ArraySourceValueFetcher(name(), context) {
@Override
protected Object parseSourceValue(Object value) {
Expand All @@ -2303,6 +2312,11 @@ public boolean isAggregatable() {
return false;
}

@Override
public boolean isVectorEmbedding() {
return true;
}

@Override
public IndexFieldData.Builder fielddataBuilder(FieldDataContext fieldDataContext) {
return elementType.fielddataBuilder(this, fieldDataContext);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,11 @@ public String typeName() {
return CONTENT_TYPE;
}

@Override
public boolean isVectorEmbedding() {
return true;
}

@Override
public IndexFieldData.Builder fielddataBuilder(FieldDataContext fieldDataContext) {
throw new IllegalArgumentException("[sparse_vector] fields do not support sorting, scripting or aggregating");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,8 @@ private SearchCapabilities() {}
private static final String INDEX_SELECTOR_SYNTAX = "index_expression_selectors";

private static final String SIGNIFICANT_TERMS_BACKGROUND_FILTER_AS_SUB = "significant_terms_background_filter_as_sub";

private static final String SIGNIFICANT_TERMS_ON_NESTED_FIELDS = "significant_terms_on_nested_fields";
private static final String INCLUDE_VECTORS_PARAM = "include_vectors_param";

public static final Set<String> CAPABILITIES;
static {
Expand All @@ -72,6 +72,7 @@ private SearchCapabilities() {}
capabilities.add(INDEX_SELECTOR_SYNTAX);
capabilities.add(SIGNIFICANT_TERMS_BACKGROUND_FILTER_AS_SUB);
capabilities.add(SIGNIFICANT_TERMS_ON_NESTED_FIELDS);
capabilities.add(INCLUDE_VECTORS_PARAM);
CAPABILITIES = Set.copyOf(capabilities);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,9 @@ private static FetchSourceContext buildFetchSourceContext(SearchContext in) {
if (sfc != null && sfc.fetchFields()) {
for (String field : sfc.fieldNames()) {
if (SourceFieldMapper.NAME.equals(field)) {
fsc = fsc == null ? FetchSourceContext.of(true) : FetchSourceContext.of(true, fsc.includes(), fsc.excludes());
fsc = fsc == null
? FetchSourceContext.of(true)
: FetchSourceContext.of(true, fsc.includeVectors(), fsc.includes(), fsc.excludes());
}
}
}
Expand Down
Loading
Loading