diff --git a/docs/changelog/131907.yaml b/docs/changelog/131907.yaml new file mode 100644 index 0000000000000..6e8db5dbb272f --- /dev/null +++ b/docs/changelog/131907.yaml @@ -0,0 +1,26 @@ +pr: 131907 +summary: Enable `exclude_source_vectors` by default for new indices +area: Vector Search +type: breaking +issues: [] +breaking: + title: Enable `exclude_source_vectors` by default for new indices + area: Search + details: |- + The `exclude_source_vectors` setting is now enabled by default for newly created indices. + This means that vector fields (e.g., `dense_vector`) are no longer stored in the `_source` field + by default, although they remain fully accessible through search and retrieval operations. + + Instead of being persisted in `_source`, vectors are now rehydrated on demand from the underlying + index structures when needed. This reduces index size and improves performance for typical vector + search workloads where the original vector values do not need to be part of the `_source`. + + If your use case requires vector fields to be stored in `_source`, you can disable this behavior by + setting `exclude_source_vectors: false` at index creation time. + impact: |- + Vector fields will no longer be stored in `_source` by default for new indices. Applications or tools + that expect to see vector fields in `_source` (for raw document inspection) + may need to be updated or configured to explicitly retain vectors using `exclude_source_vectors: false`. + + Retrieval of vector fields via search or the `_source` API remains fully supported. + notable: true diff --git a/docs/reference/elasticsearch/mapping-reference/dense-vector.md b/docs/reference/elasticsearch/mapping-reference/dense-vector.md index 2cd1613118edb..0dc84889d4799 100644 --- a/docs/reference/elasticsearch/mapping-reference/dense-vector.md +++ b/docs/reference/elasticsearch/mapping-reference/dense-vector.md @@ -102,6 +102,81 @@ PUT my-index-2 {{es}} uses the [HNSW algorithm](https://arxiv.org/abs/1603.09320) to support efficient kNN search. Like most kNN algorithms, HNSW is an approximate method that sacrifices result accuracy for improved speed. +## Accessing `dense_vector` fields in search responses +```{applies_to} +stack: ga 9.2 +serverless: ga +``` + +By default, `dense_vector` fields are **not included in `_source`** in responses from the `_search`, `_msearch`, `_get`, and `_mget` APIs. +This helps reduce response size and improve performance, especially in scenarios where vectors are used solely for similarity scoring and not required in the output. + +To retrieve vector values explicitly, you can use: + +* The `fields` option to request specific vector fields directly: + +```console +POST my-index-2/_search +{ + "fields": ["my_vector"] +} +``` + +- The `_source.exclude_vectors` flag to re-enable vector inclusion in `_source` responses: + +```console +POST my-index-2/_search +{ + "_source": { + "exclude_vectors": false + } +} +``` + +### Storage behavior and `_source` + +By default, `dense_vector` fields are **not stored in `_source`** on disk. This is also controlled by the index setting `index.mapping.exclude_source_vectors`. +This setting is enabled by default for newly created indices and can only be set at index creation time. + +When enabled: + +* `dense_vector` fields are removed from `_source` and the rest of the `_source` is stored as usual. +* If a request includes `_source` and vector values are needed (e.g., during recovery or reindex), the vectors are rehydrated from their internal format. + +This setting is compatible with synthetic `_source`, where the entire `_source` document is reconstructed from columnar storage. In full synthetic mode, no `_source` is stored on disk, and all fields — including vectors — are rebuilt when needed. + +### Rehydration and precision + +When vector values are rehydrated (e.g., for reindex, recovery, or explicit `_source` requests), they are restored from their internal format. Internally, vectors are stored at float precision, so if they were originally indexed as higher-precision types (e.g., `double` or `long`), the rehydrated values will have reduced precision. This lossy representation is intended to save space while preserving search quality. + +### Storing original vectors in `_source` + +If you want to preserve the original vector values exactly as they were provided, you can re-enable vector storage in `_source`: + +```console +PUT my-index-include-vectors +{ + "settings": { + "index.mapping.exclude_source_vectors": false + }, + "mappings": { + "properties": { + "my_vector": { + "type": "dense_vector" + } + } + } +} +``` + +When this setting is disabled: + +* `dense_vector` fields are stored as part of the `_source`, exactly as indexed. +* The index will store both the original `_source` value and the internal representation used for vector search, resulting in increased storage usage. +* Vectors are once again returned in `_source` by default in all relevant APIs, with no need to use `exclude_vectors` or `fields`. + +This configuration is appropriate when full source fidelity is required, such as for auditing or round-tripping exact input values. + ## Automatically quantize vectors for kNN search [dense-vector-quantization] The `dense_vector` type supports quantization to reduce the memory footprint required when [searching](docs-content://solutions/search/vector/knn.md#approximate-knn) `float` vectors. The three following quantization strategies are supported: @@ -266,16 +341,16 @@ $$$dense-vector-index-options$$$ `type` : (Required, string) The type of kNN algorithm to use. Can be either any of: * `hnsw` - This utilizes the [HNSW algorithm](https://arxiv.org/abs/1603.09320) for scalable approximate kNN search. This supports all `element_type` values. - * `int8_hnsw` - The default index type for some float vectors: - - * {applies_to}`stack: ga 9.1` Default for float vectors with less than 384 dimensions. + * `int8_hnsw` - The default index type for some float vectors: + + * {applies_to}`stack: ga 9.1` Default for float vectors with less than 384 dimensions. * {applies_to}`stack: ga 9.0` Default for float all vectors. - + This utilizes the [HNSW algorithm](https://arxiv.org/abs/1603.09320) in addition to automatically scalar quantization for scalable approximate kNN search with `element_type` of `float`. This can reduce the memory footprint by 4x at the cost of some accuracy. See [Automatically quantize vectors for kNN search](#dense-vector-quantization). * `int4_hnsw` - This utilizes the [HNSW algorithm](https://arxiv.org/abs/1603.09320) in addition to automatically scalar quantization for scalable approximate kNN search with `element_type` of `float`. This can reduce the memory footprint by 8x at the cost of some accuracy. See [Automatically quantize vectors for kNN search](#dense-vector-quantization). * `bbq_hnsw` - This utilizes the [HNSW algorithm](https://arxiv.org/abs/1603.09320) in addition to automatically binary quantization for scalable approximate kNN search with `element_type` of `float`. This can reduce the memory footprint by 32x at the cost of accuracy. See [Automatically quantize vectors for kNN search](#dense-vector-quantization). - - {applies_to}`stack: ga 9.1` `bbq_hnsw` is the default index type for float vectors with greater than or equal to 384 dimensions. + + {applies_to}`stack: ga 9.1` `bbq_hnsw` is the default index type for float vectors with greater than or equal to 384 dimensions. * `flat` - This utilizes a brute-force search algorithm for exact kNN search. This supports all `element_type` values. * `int8_flat` - This utilizes a brute-force search algorithm in addition to automatically scalar quantization. Only supports `element_type` of `float`. * `int4_flat` - This utilizes a brute-force search algorithm in addition to automatically half-byte scalar quantization. Only supports `element_type` of `float`. @@ -295,8 +370,8 @@ $$$dense-vector-index-options$$$ : (Optional, object) An optional section that configures automatic vector rescoring on knn queries for the given field. Only applicable to quantized index types. :::::{dropdown} Properties of rescore_vector `oversample` -: (required, float) The amount to oversample the search results by. This value should be one of the following: - * Greater than `1.0` and less than `10.0` +: (required, float) The amount to oversample the search results by. This value should be one of the following: + * Greater than `1.0` and less than `10.0` * Exactly `0` to indicate no oversampling and rescoring should occur {applies_to}`stack: ga 9.1` : The higher the value, the more vectors will be gathered and rescored with the raw values per shard. : In case a knn query specifies a `rescore_vector` parameter, the query `rescore_vector` parameter will be used instead. diff --git a/docs/reference/elasticsearch/mapping-reference/rank-vectors.md b/docs/reference/elasticsearch/mapping-reference/rank-vectors.md index 20683e89b164c..2c22dd08ae1b0 100644 --- a/docs/reference/elasticsearch/mapping-reference/rank-vectors.md +++ b/docs/reference/elasticsearch/mapping-reference/rank-vectors.md @@ -108,11 +108,81 @@ $$$rank-vectors-element-type$$$ `dims` : (Optional, integer) Number of vector dimensions. Can’t exceed `4096`. If `dims` is not specified, it will be set to the length of the first vector added to the field. +## Accessing `dense_vector` fields in search responses +```{applies_to} +stack: ga 9.2 +serverless: ga +``` + +By default, `dense_vector` fields are **not included in `_source`** in responses from the `_search`, `_msearch`, `_get`, and `_mget` APIs. +This helps reduce response size and improve performance, especially in scenarios where vectors are used solely for similarity scoring and not required in the output. + +To retrieve vector values explicitly, you can use: + +* The `fields` option to request specific vector fields directly: + +```console +POST my-index-2/_search +{ + "fields": ["my_vector"] +} +``` + +- The `_source.exclude_vectors` flag to re-enable vector inclusion in `_source` responses: + +```console +POST my-index-2/_search +{ + "_source": { + "exclude_vectors": false + } +} +``` + +### Storage behavior and `_source` + +By default, `rank_vectors` fields are not stored in `_source` on disk. This is also controlled by the index setting `index.mapping.exclude_source_vectors`. +This setting is enabled by default for newly created indices and can only be set at index creation time. + +When enabled: + +* `rank_vectors` fields are removed from `_source` and the rest of the `_source` is stored as usual. +* If a request includes `_source` and vector values are needed (e.g., during recovery or reindex), the vectors are rehydrated from their internal format. + +This setting is compatible with synthetic `_source`, where the entire `_source` document is reconstructed from columnar storage. In full synthetic mode, no `_source` is stored on disk, and all fields — including vectors — are rebuilt when needed. + +### Rehydration and precision + +When vector values are rehydrated (e.g., for reindex, recovery, or explicit `_source` requests), they are restored from their internal format. Internally, vectors are stored at float precision, so if they were originally indexed as higher-precision types (e.g., `double` or `long`), the rehydrated values will have reduced precision. This lossy representation is intended to save space while preserving search quality. + +### Storing original vectors in `_source` + +If you want to preserve the original vector values exactly as they were provided, you can re-enable vector storage in `_source`: + +```console +PUT my-index-include-vectors +{ + "settings": { + "index.mapping.exclude_source_vectors": false + }, + "mappings": { + "properties": { + "my_vector": { + "type": "rank_vectors", + "dims": 128 + } + } + } +} +``` -## Synthetic `_source` [rank-vectors-synthetic-source] +When this setting is disabled: -`rank_vectors` fields support [synthetic `_source`](mapping-source-field.md#synthetic-source) . +* `rank_vectors` fields are stored as part of the `_source`, exactly as indexed. +* The index will store both the original `_source` value and the internal representation used for vector search, resulting in increased storage usage. +* Vectors are once again returned in `_source` by default in all relevant APIs, with no need to use `exclude_vectors` or `fields`. +This configuration is appropriate when full source fidelity is required, such as for auditing or round-tripping exact input values. ## Scoring with rank vectors [rank-vectors-scoring] diff --git a/docs/reference/elasticsearch/mapping-reference/sparse-vector.md b/docs/reference/elasticsearch/mapping-reference/sparse-vector.md index 3a65ea4fc5ff8..5cb009ddcf302 100644 --- a/docs/reference/elasticsearch/mapping-reference/sparse-vector.md +++ b/docs/reference/elasticsearch/mapping-reference/sparse-vector.md @@ -57,12 +57,6 @@ See [semantic search with ELSER](docs-content://solutions/search/semantic-search The following parameters are accepted by `sparse_vector` fields: -[store](/reference/elasticsearch/mapping-reference/mapping-store.md) -: Indicates whether the field value should be stored and retrievable independently of the [_source](/reference/elasticsearch/mapping-reference/mapping-source-field.md) field. Accepted values: true or false (default). The field’s data is stored using term vectors, a disk-efficient structure compared to the original JSON input. The input map can be retrieved during a search request via the [`fields` parameter](/reference/elasticsearch/rest-apis/retrieve-selected-fields.md#search-fields-param). To benefit from reduced disk usage, you must either: - - * Exclude the field from [_source](/reference/elasticsearch/rest-apis/retrieve-selected-fields.md#source-filtering). - * Use [synthetic `_source`](/reference/elasticsearch/mapping-reference/mapping-source-field.md#synthetic-source). - index_options {applies_to}`stack: ga 9.1` : (Optional, object) You can set index options for your `sparse_vector` field to determine if you should prune tokens, and the parameter configurations for the token pruning. If pruning options are not set in your [`sparse_vector` query](/reference/query-languages/query-dsl/query-dsl-sparse-vector-query.md), Elasticsearch will use the default options configured for the field, if any. @@ -96,6 +90,82 @@ This ensures that: * The tokens that are kept are frequent enough and have significant scoring. * Very infrequent tokens that may not have as high of a score are removed. +## Accessing `dense_vector` fields in search responses +```{applies_to} +stack: ga 9.2 +serverless: ga +``` + +By default, `dense_vector` fields are **not included in `_source`** in responses from the `_search`, `_msearch`, `_get`, and `_mget` APIs. +This helps reduce response size and improve performance, especially in scenarios where vectors are used solely for similarity scoring and not required in the output. + +To retrieve vector values explicitly, you can use: + +* The `fields` option to request specific vector fields directly: + +```console +POST my-index-2/_search +{ + "fields": ["my_vector"] +} +``` + +- The `_source.exclude_vectors` flag to re-enable vector inclusion in `_source` responses: + +```console +POST my-index-2/_search +{ + "_source": { + "exclude_vectors": false + } +} +``` + +### Storage behavior and `_source` + +By default, `sparse_vector` fields are not stored in `_source` on disk. This is also controlled by the index setting `index.mapping.exclude_source_vectors`. +This setting is enabled by default for newly created indices and can only be set at index creation time. + +When enabled: + +* `sparse_vector` fields are removed from `_source` and the rest of the `_source` is stored as usual. +* If a request includes `_source` and vector values are needed (e.g., during recovery or reindex), the vectors are rehydrated from their internal format. + +This setting is compatible with synthetic `_source`, where the entire `_source` document is reconstructed from columnar storage. In full synthetic mode, no `_source` is stored on disk, and all fields — including vectors — are rebuilt when needed. + +### Rehydration and precision + +When vector values are rehydrated (e.g., for reindex, recovery, or explicit `_source` requests), they are restored from their internal format. +Internally, vectors are stored as floats with 9 significant bits for the precision, so the rehydrated values will have reduced precision. +This lossy representation is intended to save space while preserving search quality. + +### Storing original vectors in `_source` + +If you want to preserve the original vector values exactly as they were provided, you can re-enable vector storage in `_source`: + +```console +PUT my-index-include-vectors +{ + "settings": { + "index.mapping.exclude_source_vectors": false + }, + "mappings": { + "properties": { + "my_vector": { + "type": "sparse_vector" + } + } + } +} +``` + +When this setting is disabled: + +* `sparse_vector` fields are stored as part of the `_source`, exactly as indexed. +* The index will store both the original `_source` value and the internal representation used for vector search, resulting in increased storage usage. +* Vectors are once again returned in `_source` by default in all relevant APIs, with no need to use `exclude_vectors` or `fields`. + +This configuration is appropriate when full source fidelity is required, such as for auditing or round-tripping exact input values. ## Multi-value sparse vectors [index-multi-value-sparse-vectors] diff --git a/modules/reindex/src/test/java/org/elasticsearch/reindex/ReindexBasicTests.java b/modules/reindex/src/test/java/org/elasticsearch/reindex/ReindexBasicTests.java index 96c7ef49f6956..92aa897bf6287 100644 --- a/modules/reindex/src/test/java/org/elasticsearch/reindex/ReindexBasicTests.java +++ b/modules/reindex/src/test/java/org/elasticsearch/reindex/ReindexBasicTests.java @@ -23,7 +23,6 @@ import java.util.Map; import java.util.stream.Collectors; -import static org.elasticsearch.index.IndexSettings.SYNTHETIC_VECTORS; import static org.elasticsearch.index.query.QueryBuilders.termQuery; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount; @@ -182,14 +181,13 @@ public void testReindexFromComplexDateMathIndexName() throws Exception { } public void testReindexIncludeVectors() throws Exception { - assumeTrue("This test requires synthetic vectors to be enabled", SYNTHETIC_VECTORS); var resp1 = prepareCreate("test").setSettings( - Settings.builder().put(IndexSettings.INDEX_MAPPING_SOURCE_SYNTHETIC_VECTORS_SETTING.getKey(), true).build() + Settings.builder().put(IndexSettings.INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING.getKey(), true).build() ).setMapping("foo", "type=dense_vector,similarity=l2_norm", "bar", "type=sparse_vector").get(); assertAcked(resp1); var resp2 = prepareCreate("test_reindex").setSettings( - Settings.builder().put(IndexSettings.INDEX_MAPPING_SOURCE_SYNTHETIC_VECTORS_SETTING.getKey(), true).build() + Settings.builder().put(IndexSettings.INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING.getKey(), true).build() ).setMapping("foo", "type=dense_vector,similarity=l2_norm", "bar", "type=sparse_vector").get(); assertAcked(resp2); @@ -237,5 +235,4 @@ public void testReindexIncludeVectors() throws Exception { searchResponse.decRef(); } } - } diff --git a/modules/reindex/src/test/java/org/elasticsearch/reindex/UpdateByQueryBasicTests.java b/modules/reindex/src/test/java/org/elasticsearch/reindex/UpdateByQueryBasicTests.java index 33c80e9138d28..2ff1a258afb1d 100644 --- a/modules/reindex/src/test/java/org/elasticsearch/reindex/UpdateByQueryBasicTests.java +++ b/modules/reindex/src/test/java/org/elasticsearch/reindex/UpdateByQueryBasicTests.java @@ -24,7 +24,6 @@ import java.util.Map; import java.util.stream.Collectors; -import static org.elasticsearch.index.IndexSettings.SYNTHETIC_VECTORS; import static org.elasticsearch.index.query.QueryBuilders.termQuery; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount; @@ -158,9 +157,8 @@ public void testMissingSources() { } public void testUpdateByQueryIncludeVectors() throws Exception { - assumeTrue("This test requires synthetic vectors to be enabled", SYNTHETIC_VECTORS); var resp1 = prepareCreate("test").setSettings( - Settings.builder().put(IndexSettings.INDEX_MAPPING_SOURCE_SYNTHETIC_VECTORS_SETTING.getKey(), true).build() + Settings.builder().put(IndexSettings.INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING.getKey(), true).build() ).setMapping("foo", "type=dense_vector,similarity=l2_norm", "bar", "type=sparse_vector").get(); assertAcked(resp1); diff --git a/rest-api-spec/build.gradle b/rest-api-spec/build.gradle index f2142e8ba1c8d..c11259b202002 100644 --- a/rest-api-spec/build.gradle +++ b/rest-api-spec/build.gradle @@ -90,6 +90,10 @@ tasks.named("yamlRestCompatTestTransform").configure ({ task -> task.skipTest("indices.create/21_synthetic_source_stored/field param - keep root array", "Synthetic source keep arrays now stores leaf arrays natively") task.skipTest("cluster.info/30_info_thread_pool/Cluster HTTP Info", "The search_throttled thread pool has been removed") task.skipTest("synonyms/80_synonyms_from_index/Fail loading synonyms from index if synonyms_set doesn't exist", "Synonyms do no longer fail if the synonyms_set doesn't exist") + task.skipTest("get/100_synthetic_source/indexed dense vectors", "Vectors are not returned by default") + task.skipTest("get/100_synthetic_source/non-indexed dense vectors", "Vectors are not returned by default") + task.skipTest("search.vectors/90_sparse_vector/stored sparse_vector synthetic source", "Vectors are not returned by default") + task.skipTest("search.vectors/90_sparse_vector/sparse_vector synthetic source", "Vectors are not returned by default") task.skipTest("update/100_synthetic_source/keyword", "synthetic recovery source means _recovery_source field will not be present") task.skipTest("update/100_synthetic_source/stored text", "synthetic recovery source means _recovery_source field will not be present") }) diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/get/100_synthetic_source.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/get/100_synthetic_source.yml index a0061272a2c23..91a1d1bf9ef40 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/get/100_synthetic_source.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/get/100_synthetic_source.yml @@ -427,6 +427,11 @@ indexed dense vectors: - requires: cluster_features: ["gte_v8.5.0"] reason: introduced in 8.5.0 + test_runner_features: [ capabilities ] + capabilities: + - method: GET + path: /_search + capabilities: [ exclude_source_vectors_setting ] - do: indices.create: @@ -457,6 +462,8 @@ indexed dense vectors: get: index: test id: 1 + _source_exclude_vectors: false + - match: {_index: "test"} - match: {_id: "1"} - match: {_version: 1} @@ -472,6 +479,11 @@ non-indexed dense vectors: - requires: cluster_features: ["gte_v8.5.0"] reason: introduced in 8.5.0 + test_runner_features: [ capabilities ] + capabilities: + - method: GET + path: /_search + capabilities: [ exclude_source_vectors_setting ] - do: indices.create: @@ -501,6 +513,8 @@ non-indexed dense vectors: get: index: test id: 1 + _source_exclude_vectors: false + - match: {_index: "test"} - match: {_id: "1"} - match: {_version: 1} diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/240_source_synthetic_dense_vectors.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/240_source_synthetic_dense_vectors.yml index 414f6cfdad645..68f8c868b4e7e 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/240_source_synthetic_dense_vectors.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/240_source_synthetic_dense_vectors.yml @@ -5,7 +5,7 @@ setup: capabilities: - method: GET path: /_search - capabilities: [ synthetic_vectors_setting ] + capabilities: [ exclude_source_vectors_setting ] - skip: features: "headers" @@ -13,8 +13,6 @@ setup: indices.create: index: test body: - settings: - index.mapping.synthetic_vectors: true mappings: properties: name: diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/250_source_synthetic_sparse_vectors.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/250_source_synthetic_sparse_vectors.yml index 53f0cd33da7d3..8397b48866204 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/250_source_synthetic_sparse_vectors.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/250_source_synthetic_sparse_vectors.yml @@ -5,7 +5,7 @@ setup: capabilities: - method: GET path: /_search - capabilities: [ synthetic_vectors_setting ] + capabilities: [ exclude_source_vectors_setting ] - skip: features: "headers" @@ -13,8 +13,6 @@ setup: indices.create: index: test body: - settings: - index.mapping.synthetic_vectors: true mappings: properties: name: diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/90_sparse_vector.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/90_sparse_vector.yml index 0b65a69bf500e..b521b2866f9c5 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/90_sparse_vector.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/90_sparse_vector.yml @@ -387,8 +387,13 @@ "sparse_vector synthetic source": - requires: - cluster_features: [ "mapper.source.mode_from_index_setting" ] + cluster_features: [ "mapper.source.mode_from_index_setting"] reason: "Source mode configured through index setting" + test_runner_features: [ capabilities, "close_to" ] + capabilities: + - method: GET + path: /_search + capabilities: [ exclude_vectors_param, exclude_source_vectors_setting ] - do: indices.create: @@ -402,6 +407,18 @@ ml.tokens: type: sparse_vector + - do: + indices.create: + index: test_include_vectors + body: + settings: + index: + mapping.exclude_source_vectors: false + mappings: + properties: + ml.tokens: + type: sparse_vector + - match: { acknowledged: true } - do: @@ -421,7 +438,7 @@ - do: index: - index: test + index: test_include_vectors id: "2" body: ml: @@ -431,7 +448,7 @@ - do: index: - index: test + index: test_include_vectors id: "3" body: ml: @@ -446,20 +463,17 @@ get: index: test id: "1" + _source_exclude_vectors: false - - match: - _source: - ml: - tokens: - running: 2.4097164 - good: 2.170997 - run: 2.052153 - race: 1.4575411 - for: 1.1908325 + - close_to: { _source.ml.tokens.running: { value: 2.4097164, error: 0.01 } } + - close_to: { _source.ml.tokens.good: { value: 2.170997, error: 0.01 } } + - close_to: { _source.ml.tokens.run: { value: 2.052153, error: 0.01 } } + - close_to: { _source.ml.tokens.race: { value: 1.4575411, error: 0.01 } } + - close_to: { _source.ml.tokens.for: { value: 1.1908325, error: 0.01 } } - do: get: - index: test + index: test_include_vectors id: "2" - match: @@ -467,7 +481,7 @@ - do: get: - index: test + index: test_include_vectors id: "3" - match: @@ -527,8 +541,14 @@ "stored sparse_vector synthetic source": - requires: - cluster_features: [ "mapper.source.mode_from_index_setting", "mapper.sparse_vector.store_support" ] reason: "sparse_vector supports store parameter" + cluster_features: [ "mapper.source.mode_from_index_setting", "mapper.sparse_vector.store_support" ] + test_runner_features: [ capabilities, "close_to" ] + capabilities: + - method: GET + path: /_search + capabilities: [ exclude_vectors_param ] + - do: indices.create: @@ -567,6 +587,8 @@ search: index: test body: + _source: + exclude_vectors: false fields: [ "ml.tokens" ] - match: diff --git a/server/src/internalClusterTest/java/org/elasticsearch/search/query/ExistsIT.java b/server/src/internalClusterTest/java/org/elasticsearch/search/query/ExistsIT.java index 26b040e2309c2..bb48e6c70b4a4 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/search/query/ExistsIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/search/query/ExistsIT.java @@ -88,9 +88,9 @@ public void testExists() throws Exception { // object fields singletonMap("bar", barObject), singletonMap("bar", singletonMap("baz", 42)), - // sparse_vector field empty - singletonMap("vec", emptyMap()), - // sparse_vector field non-empty + // sparse_vector field + singletonMap("vec", singletonMap("6", 100)), + // sparse_vector field singletonMap("vec", singletonMap("1", 100)), // empty doc emptyMap() }; diff --git a/server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java b/server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java index 9f4c5b80ccf23..93ddb5d3fc485 100644 --- a/server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java +++ b/server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java @@ -49,8 +49,6 @@ import java.util.Map; import java.util.Set; -import static org.elasticsearch.index.IndexSettings.SYNTHETIC_VECTORS; - /** * Encapsulates all valid index level settings. * @see Property#IndexScope @@ -243,9 +241,7 @@ public final class IndexScopedSettings extends AbstractScopedSettings { if (IndexSettings.DOC_VALUES_SKIPPER) { settings.add(IndexSettings.USE_DOC_VALUES_SKIPPER); } - if (SYNTHETIC_VECTORS) { - settings.add(IndexSettings.INDEX_MAPPING_SOURCE_SYNTHETIC_VECTORS_SETTING); - } + settings.add(IndexSettings.INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING); BUILT_IN_INDEX_SETTINGS = Collections.unmodifiableSet(settings); }; diff --git a/server/src/main/java/org/elasticsearch/index/IndexSettings.java b/server/src/main/java/org/elasticsearch/index/IndexSettings.java index cd78d4323f44b..a6335ca6666b0 100644 --- a/server/src/main/java/org/elasticsearch/index/IndexSettings.java +++ b/server/src/main/java/org/elasticsearch/index/IndexSettings.java @@ -848,12 +848,12 @@ private static String getIgnoreAboveDefaultValue(final Settings settings) { Property.Final ); - public static final boolean SYNTHETIC_VECTORS = new FeatureFlag("mapping_synthetic_vectors").isEnabled(); - public static final Setting INDEX_MAPPING_SOURCE_SYNTHETIC_VECTORS_SETTING = Setting.boolSetting( - "index.mapping.synthetic_vectors", - false, + public static final Setting INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING = Setting.boolSetting( + "index.mapping.exclude_source_vectors", + settings -> String.valueOf(SETTING_INDEX_VERSION_CREATED.get(settings).onOrAfter(IndexVersions.EXCLUDE_SOURCE_VECTORS_DEFAULT)), Property.IndexScope, - Property.Final + Property.Final, + Property.ServerlessPublic ); private final Index index; diff --git a/server/src/main/java/org/elasticsearch/index/IndexVersions.java b/server/src/main/java/org/elasticsearch/index/IndexVersions.java index 57fcc2bc763be..221bc9264b100 100644 --- a/server/src/main/java/org/elasticsearch/index/IndexVersions.java +++ b/server/src/main/java/org/elasticsearch/index/IndexVersions.java @@ -181,6 +181,7 @@ private static Version parseUnchecked(String version) { public static final IndexVersion DEFAULT_DENSE_VECTOR_TO_BBQ_HNSW = def(9_032_0_00, Version.LUCENE_10_2_2); public static final IndexVersion MATCH_ONLY_TEXT_STORED_AS_BYTES = def(9_033_0_00, Version.LUCENE_10_2_2); public static final IndexVersion IGNORED_SOURCE_FIELDS_PER_ENTRY_WITH_FF = def(9_034_0_00, Version.LUCENE_10_2_2); + public static final IndexVersion EXCLUDE_SOURCE_VECTORS_DEFAULT = def(9_035_0_00, Version.LUCENE_10_2_2); /* * STOP! READ THIS FIRST! No, really, diff --git a/server/src/main/java/org/elasticsearch/index/engine/TranslogOperationAsserter.java b/server/src/main/java/org/elasticsearch/index/engine/TranslogOperationAsserter.java index 2cd3a9f755ffb..0898711aee809 100644 --- a/server/src/main/java/org/elasticsearch/index/engine/TranslogOperationAsserter.java +++ b/server/src/main/java/org/elasticsearch/index/engine/TranslogOperationAsserter.java @@ -41,8 +41,10 @@ public boolean assertSameIndexOperation(Translog.Index o1, Translog.Index o2) th if (engineConfig.getIndexSettings().isRecoverySourceSyntheticEnabled() || engineConfig.getMapperService().mappingLookup().inferenceFields().isEmpty() == false || engineConfig.getMapperService().mappingLookup().syntheticVectorFields().isEmpty() == false) { - return super.assertSameIndexOperation(synthesizeSource(engineConfig, o1), o2) - || super.assertSameIndexOperation(o1, synthesizeSource(engineConfig, o2)); + // for synthetic source and synthetic fields, we check that the resulting source map is equivalent + // since ordering might not be preserved. + return Translog.Index.equalsWithoutAutoGeneratedTimestamp(synthesizeSource(engineConfig, o1), o2, false) + || Translog.Index.equalsWithoutAutoGeneratedTimestamp(o1, synthesizeSource(engineConfig, o2), false); } return false; } @@ -99,6 +101,6 @@ static Translog.Snapshot newSnapshot(EngineConfig engineConfig, Translog.Index o } public boolean assertSameIndexOperation(Translog.Index o1, Translog.Index o2) throws IOException { - return Translog.Index.equalsWithoutAutoGeneratedTimestamp(o1, o2); + return Translog.Index.equalsWithoutAutoGeneratedTimestamp(o1, o2, true); } } diff --git a/server/src/main/java/org/elasticsearch/index/get/ShardGetService.java b/server/src/main/java/org/elasticsearch/index/get/ShardGetService.java index ec0ad1acd917f..6688523bfe668 100644 --- a/server/src/main/java/org/elasticsearch/index/get/ShardGetService.java +++ b/server/src/main/java/org/elasticsearch/index/get/ShardGetService.java @@ -60,7 +60,7 @@ import java.util.function.Function; import java.util.stream.Collectors; -import static org.elasticsearch.index.IndexSettings.INDEX_MAPPING_SOURCE_SYNTHETIC_VECTORS_SETTING; +import static org.elasticsearch.index.IndexSettings.INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING; import static org.elasticsearch.index.seqno.SequenceNumbers.UNASSIGNED_PRIMARY_TERM; import static org.elasticsearch.index.seqno.SequenceNumbers.UNASSIGNED_SEQ_NO; @@ -418,7 +418,7 @@ private GetResult innerGetFetch( */ public static boolean shouldExcludeVectorsFromSource(IndexSettings indexSettings, FetchSourceContext fetchSourceContext) { if (fetchSourceContext == null || fetchSourceContext.excludeVectors() == null) { - return INDEX_MAPPING_SOURCE_SYNTHETIC_VECTORS_SETTING.get(indexSettings.getSettings()); + return INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING.get(indexSettings.getSettings()); } return fetchSourceContext.excludeVectors(); } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java b/server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java index 1a248f2dd501e..87017a24765dc 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java @@ -794,7 +794,7 @@ private static void postProcessDynamicArrayMapping(DocumentParserContext context DenseVectorFieldMapper.Builder builder = new DenseVectorFieldMapper.Builder( fieldName, context.indexSettings().getIndexVersionCreated(), - IndexSettings.INDEX_MAPPING_SOURCE_SYNTHETIC_VECTORS_SETTING.get(context.indexSettings().getSettings()) + IndexSettings.INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING.get(context.indexSettings().getSettings()) ); builder.dimensions(mappers.size()); DenseVectorFieldMapper denseVectorFieldMapper = builder.build(builderContext); diff --git a/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java index cde64f54c80d5..9c5d28a4942d3 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java @@ -117,7 +117,7 @@ import static org.elasticsearch.cluster.metadata.IndexMetadata.SETTING_INDEX_VERSION_CREATED; import static org.elasticsearch.common.Strings.format; import static org.elasticsearch.common.xcontent.XContentParserUtils.ensureExpectedToken; -import static org.elasticsearch.index.IndexSettings.INDEX_MAPPING_SOURCE_SYNTHETIC_VECTORS_SETTING; +import static org.elasticsearch.index.IndexSettings.INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING; import static org.elasticsearch.index.codec.vectors.IVFVectorsFormat.MAX_VECTORS_PER_CLUSTER; import static org.elasticsearch.index.codec.vectors.IVFVectorsFormat.MIN_VECTORS_PER_CLUSTER; @@ -255,9 +255,9 @@ public static class Builder extends FieldMapper.Builder { private final Parameter> meta = Parameter.metaParam(); final IndexVersion indexVersionCreated; - final boolean isSyntheticVector; + final boolean isExcludeSourceVectors; - public Builder(String name, IndexVersion indexVersionCreated, boolean isSyntheticVector) { + public Builder(String name, IndexVersion indexVersionCreated, boolean isExcludeSourceVectors) { super(name); this.indexVersionCreated = indexVersionCreated; // This is defined as updatable because it can be updated once, from [null] to a valid dim size, @@ -289,7 +289,7 @@ public Builder(String name, IndexVersion indexVersionCreated, boolean isSyntheti } } }); - this.isSyntheticVector = isSyntheticVector; + this.isExcludeSourceVectors = isExcludeSourceVectors; final boolean indexedByDefault = indexVersionCreated.onOrAfter(INDEXED_BY_DEFAULT_INDEX_VERSION); final boolean defaultInt8Hnsw = indexVersionCreated.onOrAfter(IndexVersions.DEFAULT_DENSE_VECTOR_TO_INT8_HNSW); final boolean defaultBBQ8Hnsw = indexVersionCreated.onOrAfter(IndexVersions.DEFAULT_DENSE_VECTOR_TO_BBQ_HNSW); @@ -431,7 +431,7 @@ public DenseVectorFieldMapper build(MapperBuilderContext context) { // Validate again here because the dimensions or element type could have been set programmatically, // which affects index option validity validate(); - boolean isSyntheticVectorFinal = (context.isSourceSynthetic() == false) && indexed.getValue() && isSyntheticVector; + boolean isExcludeSourceVectorsFinal = context.isSourceSynthetic() == false && indexed.getValue() && isExcludeSourceVectors; return new DenseVectorFieldMapper( leafName(), new DenseVectorFieldType( @@ -448,7 +448,7 @@ public DenseVectorFieldMapper build(MapperBuilderContext context) { builderParams(this, context), indexOptions.getValue(), indexVersionCreated, - isSyntheticVectorFinal + isExcludeSourceVectorsFinal ); } } @@ -2391,7 +2391,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws (n, c) -> new Builder( n, c.getIndexSettings().getIndexVersionCreated(), - INDEX_MAPPING_SOURCE_SYNTHETIC_VECTORS_SETTING.get(c.getIndexSettings().getSettings()) + INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING.get(c.getIndexSettings().getSettings()) ), notInMultiFields(CONTENT_TYPE) ); @@ -2850,7 +2850,7 @@ public List fetchValues(Source source, int doc, List ignoredValu private final DenseVectorIndexOptions indexOptions; private final IndexVersion indexCreatedVersion; - private final boolean isSyntheticVector; + private final boolean isExcludeSourceVectors; private DenseVectorFieldMapper( String simpleName, @@ -2858,12 +2858,12 @@ private DenseVectorFieldMapper( BuilderParams params, DenseVectorIndexOptions indexOptions, IndexVersion indexCreatedVersion, - boolean isSyntheticVector + boolean isExcludeSourceVectorsFinal ) { super(simpleName, mappedFieldType, params); this.indexOptions = indexOptions; this.indexCreatedVersion = indexCreatedVersion; - this.isSyntheticVector = isSyntheticVector; + this.isExcludeSourceVectors = isExcludeSourceVectorsFinal; } @Override @@ -2985,7 +2985,7 @@ protected String contentType() { @Override public FieldMapper.Builder getMergeBuilder() { - return new Builder(leafName(), indexCreatedVersion, isSyntheticVector).init(this); + return new Builder(leafName(), indexCreatedVersion, isExcludeSourceVectors).init(this); } private static DenseVectorIndexOptions parseIndexOptions(String fieldName, Object propNode, IndexVersion indexVersion) { @@ -3041,7 +3041,7 @@ public String toString() { @Override public SourceLoader.SyntheticVectorsLoader syntheticVectorsLoader() { - if (isSyntheticVector) { + if (isExcludeSourceVectors) { var syntheticField = new IndexedSyntheticFieldLoader(indexCreatedVersion, fieldType().similarity); return new SyntheticVectorsPatchFieldLoader(syntheticField, syntheticField::copyVectorAsList); } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapper.java index a91c84405b295..ef76540525898 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapper.java @@ -12,11 +12,8 @@ import org.apache.lucene.document.FeatureField; import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.LeafReader; -import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.PostingsEnum; -import org.apache.lucene.index.TermVectors; import org.apache.lucene.index.TermsEnum; -import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.Query; import org.apache.lucene.util.BytesRef; @@ -42,8 +39,6 @@ import org.elasticsearch.index.query.SearchExecutionContext; import org.elasticsearch.inference.WeightedToken; import org.elasticsearch.inference.WeightedTokensUtils; -import org.elasticsearch.search.fetch.StoredFieldsSpec; -import org.elasticsearch.search.lookup.Source; import org.elasticsearch.xcontent.ConstructingObjectParser; import org.elasticsearch.xcontent.DeprecationHandler; import org.elasticsearch.xcontent.NamedXContentRegistry; @@ -62,7 +57,7 @@ import java.util.Objects; import java.util.stream.Stream; -import static org.elasticsearch.index.IndexSettings.INDEX_MAPPING_SOURCE_SYNTHETIC_VECTORS_SETTING; +import static org.elasticsearch.index.IndexSettings.INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING; import static org.elasticsearch.index.query.AbstractQueryBuilder.DEFAULT_BOOST; import static org.elasticsearch.xcontent.ConstructingObjectParser.optionalConstructorArg; @@ -94,8 +89,7 @@ private static SparseVectorFieldMapper toType(FieldMapper in) { public static class Builder extends FieldMapper.Builder { private final IndexVersion indexVersionCreated; - - private final Parameter stored = Parameter.storeParam(m -> toType(m).fieldType().isStored(), false); + private final Parameter stored; private final Parameter> meta = Parameter.metaParam(); private final Parameter indexOptions = new Parameter<>( SPARSE_VECTOR_INDEX_OPTIONS, @@ -107,12 +101,13 @@ public static class Builder extends FieldMapper.Builder { Objects::toString ).acceptsNull().setSerializerCheck(this::indexOptionsSerializerCheck); - private boolean isSyntheticVector; + private final boolean isExcludeSourceVectors; - public Builder(String name, IndexVersion indexVersionCreated, boolean isSyntheticVector) { + public Builder(String name, IndexVersion indexVersionCreated, boolean isExcludeSourceVectors) { super(name); + this.stored = Parameter.boolParam("store", false, m -> toType(m).fieldType().isStored(), () -> isExcludeSourceVectors); this.indexVersionCreated = indexVersionCreated; - this.isSyntheticVector = isSyntheticVector; + this.isExcludeSourceVectors = isExcludeSourceVectors; } public Builder setStored(boolean value) { @@ -132,19 +127,18 @@ public SparseVectorFieldMapper build(MapperBuilderContext context) { builderIndexOptions = SparseVectorIndexOptions.getDefaultIndexOptions(indexVersionCreated); } - final boolean syntheticVectorFinal = context.isSourceSynthetic() == false && isSyntheticVector; - final boolean storedFinal = stored.getValue() || syntheticVectorFinal; + final boolean isExcludeSourceVectorsFinal = isExcludeSourceVectors && context.isSourceSynthetic() == false && stored.get(); return new SparseVectorFieldMapper( leafName(), new SparseVectorFieldType( indexVersionCreated, context.buildFullName(leafName()), - storedFinal, + stored.get(), meta.getValue(), builderIndexOptions ), builderParams(this, context), - syntheticVectorFinal + isExcludeSourceVectorsFinal ); } @@ -206,7 +200,7 @@ private static SparseVectorIndexOptions parseIndexOptions(MappingParserContext c return new Builder( n, c.indexVersionCreated(), - INDEX_MAPPING_SOURCE_SYNTHETIC_VECTORS_SETTING.get(c.getIndexSettings().getSettings()) + INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING.get(c.getIndexSettings().getSettings()) ); }, notInMultiFields(CONTENT_TYPE)); @@ -251,9 +245,6 @@ public IndexFieldData.Builder fielddataBuilder(FieldDataContext fieldDataContext @Override public ValueFetcher valueFetcher(SearchExecutionContext context, String format) { - if (isStored()) { - return new SparseVectorValueFetcher(name()); - } return SourceValueFetcher.identity(name(), context, format); } @@ -313,16 +304,17 @@ private static String indexedValueForSearch(Object value) { } } - private final boolean isSyntheticVector; + private final boolean isExcludeSourceVectors; private SparseVectorFieldMapper( String simpleName, MappedFieldType mappedFieldType, BuilderParams builderParams, - boolean isSyntheticVector + boolean isExcludeSourceVectors ) { super(simpleName, mappedFieldType, builderParams); - this.isSyntheticVector = isSyntheticVector; + assert isExcludeSourceVectors == false || fieldType().isStored(); + this.isExcludeSourceVectors = isExcludeSourceVectors; } @Override @@ -335,7 +327,7 @@ protected SyntheticSourceSupport syntheticSourceSupport() { @Override public SourceLoader.SyntheticVectorsLoader syntheticVectorsLoader() { - if (isSyntheticVector) { + if (isExcludeSourceVectors) { var syntheticField = new SparseVectorSyntheticFieldLoader(fullPath(), leafName()); return new SyntheticVectorsPatchFieldLoader(syntheticField, syntheticField::copyAsMap); } @@ -349,7 +341,7 @@ public Map indexAnalyzers() { @Override public FieldMapper.Builder getMergeBuilder() { - return new Builder(leafName(), this.fieldType().indexVersionCreated, this.isSyntheticVector).init(this); + return new Builder(leafName(), this.fieldType().indexVersionCreated, this.isExcludeSourceVectors).init(this); } @Override @@ -433,51 +425,6 @@ private static boolean indexVersionSupportsDefaultPruningConfig(IndexVersion ind || indexVersion.between(SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_VERSION_8_X, IndexVersions.UPGRADE_TO_LUCENE_10_0_0)); } - private static class SparseVectorValueFetcher implements ValueFetcher { - private final String fieldName; - private TermVectors termVectors; - - private SparseVectorValueFetcher(String fieldName) { - this.fieldName = fieldName; - } - - @Override - public void setNextReader(LeafReaderContext context) { - try { - termVectors = context.reader().termVectors(); - } catch (IOException exc) { - throw new UncheckedIOException(exc); - } - } - - @Override - public List fetchValues(Source source, int doc, List ignoredValues) throws IOException { - if (termVectors == null) { - return List.of(); - } - var terms = termVectors.get(doc, fieldName); - if (terms == null) { - return List.of(); - } - - var termsEnum = terms.iterator(); - PostingsEnum postingsScratch = null; - Map result = new LinkedHashMap<>(); - while (termsEnum.next() != null) { - postingsScratch = termsEnum.postings(postingsScratch); - postingsScratch.nextDoc(); - result.put(termsEnum.term().utf8ToString(), XFeatureField.decodeFeatureValue(postingsScratch.freq())); - assert postingsScratch.nextDoc() == DocIdSetIterator.NO_MORE_DOCS; - } - return List.of(result); - } - - @Override - public StoredFieldsSpec storedFieldsSpec() { - return StoredFieldsSpec.NO_REQUIREMENTS; - } - } - private static class SparseVectorSyntheticFieldLoader implements SourceLoader.SyntheticFieldLoader { private final String fullPath; private final String leafName; diff --git a/server/src/main/java/org/elasticsearch/index/translog/Translog.java b/server/src/main/java/org/elasticsearch/index/translog/Translog.java index b1a203616b120..6e83a684cfa82 100644 --- a/server/src/main/java/org/elasticsearch/index/translog/Translog.java +++ b/server/src/main/java/org/elasticsearch/index/translog/Translog.java @@ -22,6 +22,7 @@ import org.elasticsearch.common.io.stream.Writeable; import org.elasticsearch.common.lucene.uid.Versions; import org.elasticsearch.common.util.BigArrays; +import org.elasticsearch.common.xcontent.XContentHelper; import org.elasticsearch.core.IOUtils; import org.elasticsearch.core.Nullable; import org.elasticsearch.core.Releasable; @@ -35,6 +36,8 @@ import org.elasticsearch.index.shard.AbstractIndexShardComponent; import org.elasticsearch.index.shard.IndexShardComponent; import org.elasticsearch.index.shard.ShardId; +import org.elasticsearch.search.lookup.Source; +import org.elasticsearch.xcontent.XContentParserConfiguration; import java.io.Closeable; import java.io.EOFException; @@ -1226,9 +1229,9 @@ public Type opType() { @Override public long estimateSize() { return (2 * id.length()) + source.length() + (routing != null ? 2 * routing.length() : 0) + (4 * Long.BYTES); // timestamp, - // seq_no, - // primary_term, - // and version + // seq_no, + // primary_term, + // and version } public String id() { @@ -1275,7 +1278,7 @@ public boolean equals(Object o) { } Index other = (Index) o; - return autoGeneratedIdTimestamp == other.autoGeneratedIdTimestamp && equalsWithoutAutoGeneratedTimestamp(this, other); + return autoGeneratedIdTimestamp == other.autoGeneratedIdTimestamp && equalsWithoutAutoGeneratedTimestamp(this, other, true); } @Override @@ -1311,15 +1314,43 @@ public long getAutoGeneratedIdTimestamp() { return autoGeneratedIdTimestamp; } - public static boolean equalsWithoutAutoGeneratedTimestamp(Translog.Index o1, Translog.Index o2) { - return o1.version == o2.version - && o1.seqNo == o2.seqNo - && o1.primaryTerm == o2.primaryTerm - && o1.id.equals(o2.id) - && o1.source.equals(o2.source) - && Objects.equals(o1.routing, o2.routing); - } + public static boolean equalsWithoutAutoGeneratedTimestamp(Translog.Index o1, Translog.Index o2, boolean checkSourceBytes) { + if (o1.version != o2.version + || o1.seqNo != o2.seqNo + || o1.primaryTerm != o2.primaryTerm + || o1.id.equals(o2.id) == false + || Objects.equals(o1.routing, o2.routing) == false) { + return false; + } + + if (checkSourceBytes) { + return o1.source.equals(o2.source); + } + var s1 = Source.fromBytes(o1.source); + var s2 = Source.fromBytes(o2.source); + try ( + var actualParser = XContentHelper.createParserNotCompressed( + XContentParserConfiguration.EMPTY, + s1.internalSourceRef(), + s1.sourceContentType() + ) + ) { + var actualMap = actualParser.map(); + try ( + var expectedParser = XContentHelper.createParserNotCompressed( + XContentParserConfiguration.EMPTY, + s2.internalSourceRef(), + s2.sourceContentType() + ) + ) { + var expectedMap = expectedParser.map(); + return expectedMap.equals(actualMap); + } + } catch (IOException exc) { + return false; + } + } } public static final class Delete extends Operation { diff --git a/server/src/main/java/org/elasticsearch/rest/action/search/SearchCapabilities.java b/server/src/main/java/org/elasticsearch/rest/action/search/SearchCapabilities.java index 6b44b787d1dbf..82410a76a8c75 100644 --- a/server/src/main/java/org/elasticsearch/rest/action/search/SearchCapabilities.java +++ b/server/src/main/java/org/elasticsearch/rest/action/search/SearchCapabilities.java @@ -12,8 +12,6 @@ import java.util.HashSet; import java.util.Set; -import static org.elasticsearch.index.IndexSettings.SYNTHETIC_VECTORS; - /** * A {@link Set} of "capabilities" supported by the {@link RestSearchAction}. */ @@ -55,10 +53,10 @@ private SearchCapabilities() {} private static final String EXCLUDE_VECTORS_PARAM = "exclude_vectors_param"; private static final String DENSE_VECTOR_UPDATABLE_BBQ = "dense_vector_updatable_bbq"; private static final String FIELD_EXISTS_QUERY_FOR_TEXT_FIELDS_NO_INDEX_OR_DV = "field_exists_query_for_text_fields_no_index_or_dv"; - private static final String SYNTHETIC_VECTORS_SETTING = "synthetic_vectors_setting"; private static final String UPDATE_FIELD_TO_BBQ_DISK = "update_field_to_bbq_disk"; private static final String KNN_FILTER_ON_NESTED_FIELDS_CAPABILITY = "knn_filter_on_nested_fields"; private static final String BUCKET_SCRIPT_PARENT_MULTI_BUCKET_ERROR = "bucket_script_parent_multi_bucket_error"; + private static final String EXCLUDE_SOURCE_VECTORS_SETTING = "exclude_source_vectors_setting"; public static final Set CAPABILITIES; static { @@ -86,9 +84,7 @@ private SearchCapabilities() {} capabilities.add(UPDATE_FIELD_TO_BBQ_DISK); capabilities.add(KNN_FILTER_ON_NESTED_FIELDS_CAPABILITY); capabilities.add(BUCKET_SCRIPT_PARENT_MULTI_BUCKET_ERROR); - if (SYNTHETIC_VECTORS) { - capabilities.add(SYNTHETIC_VECTORS_SETTING); - } + capabilities.add(EXCLUDE_SOURCE_VECTORS_SETTING); CAPABILITIES = Set.copyOf(capabilities); } } diff --git a/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java index 09d1ad47a1083..ad5f5bdc1eedb 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java @@ -2479,22 +2479,7 @@ protected Object generateRandomInputValue(MappedFieldType ft) { DenseVectorFieldType vectorFieldType = (DenseVectorFieldType) ft; return switch (vectorFieldType.getElementType()) { case BYTE -> randomByteArrayOfLength(vectorFieldType.getVectorDimensions()); - case FLOAT -> { - float[] floats = new float[vectorFieldType.getVectorDimensions()]; - float magnitude = 0; - for (int i = 0; i < floats.length; i++) { - float f = randomFloat(); - floats[i] = f; - magnitude += f * f; - } - magnitude = (float) Math.sqrt(magnitude); - if (VectorSimilarity.DOT_PRODUCT.equals(vectorFieldType.getSimilarity())) { - for (int i = 0; i < floats.length; i++) { - floats[i] /= magnitude; - } - } - yield floats; - } + case FLOAT -> randomNormalizedVector(vectorFieldType.getVectorDimensions()); case BIT -> randomByteArrayOfLength(vectorFieldType.getVectorDimensions() / 8); }; } diff --git a/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java index 3d8eb1b454a56..45fe7a16048a6 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java @@ -118,7 +118,7 @@ protected void minimalFieldMappingPreviousIndexDefaultsIncluded(XContentBuilder protected void minimalMappingWithExplicitDefaults(XContentBuilder b) throws IOException { b.field("type", "sparse_vector"); - b.field("store", false); + b.field("store", true); b.startObject("meta"); b.endObject(); @@ -421,7 +421,7 @@ public void testStoreIsNotUpdateable() throws IOException { .startObject("properties") .startObject("field") .field("type", "sparse_vector") - .field("store", true) + .field("store", false) .endObject() .endObject() .endObject() @@ -474,23 +474,16 @@ protected boolean allowsNullValues() { @Override protected SyntheticSourceSupport syntheticSourceSupport(boolean syntheticSource) { - boolean withStore = randomBoolean(); return new SyntheticSourceSupport() { @Override public boolean preservesExactSource() { - return withStore == false; + return false; } @Override public SyntheticSourceExample example(int maxValues) { var sample = getSampleValueForDocument(); - return new SyntheticSourceExample(sample, sample, b -> { - if (withStore) { - minimalStoreMapping(b); - } else { - minimalMapping(b); - } - }); + return new SyntheticSourceExample(sample, sample, b -> { minimalMapping(b); }); } @Override diff --git a/server/src/test/java/org/elasticsearch/index/mapper/vectors/SyntheticVectorFieldsRecoveryTests.java b/server/src/test/java/org/elasticsearch/index/mapper/vectors/SyntheticVectorFieldsRecoveryTests.java index 138d138b741e5..3718dae1c7d3e 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/vectors/SyntheticVectorFieldsRecoveryTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/vectors/SyntheticVectorFieldsRecoveryTests.java @@ -38,7 +38,6 @@ import java.util.ArrayList; import java.util.List; -import static org.elasticsearch.index.IndexSettings.SYNTHETIC_VECTORS; import static org.hamcrest.Matchers.equalTo; public class SyntheticVectorFieldsRecoveryTests extends EngineTestCase { @@ -69,7 +68,7 @@ protected Settings indexSettings() { builder.put(IndexSettings.INDEX_MAPPER_SOURCE_MODE_SETTING.getKey(), SourceFieldMapper.Mode.SYNTHETIC.name()); builder.put(IndexSettings.RECOVERY_USE_SYNTHETIC_SOURCE_SETTING.getKey(), useSyntheticRecovery); } - builder.put(IndexSettings.INDEX_MAPPING_SOURCE_SYNTHETIC_VECTORS_SETTING.getKey(), true); + builder.put(IndexSettings.INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING.getKey(), true); return builder.build(); } @@ -113,7 +112,6 @@ protected String defaultMapping() { } public void testSnapshotRecovery() throws IOException { - assumeTrue("feature flag must be enabled for synthetic vectors", SYNTHETIC_VECTORS); List expectedOperations = new ArrayList<>(); int size = randomIntBetween(10, 50); for (int i = 0; i < size; i++) { diff --git a/server/src/test/java/org/elasticsearch/index/mapper/vectors/SyntheticVectorsMapperTestCase.java b/server/src/test/java/org/elasticsearch/index/mapper/vectors/SyntheticVectorsMapperTestCase.java index ebb4fe788fea3..f7a23383f4e92 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/vectors/SyntheticVectorsMapperTestCase.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/vectors/SyntheticVectorsMapperTestCase.java @@ -26,12 +26,10 @@ import java.io.IOException; -import static org.elasticsearch.index.IndexSettings.SYNTHETIC_VECTORS; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertToXContentEquivalent; public abstract class SyntheticVectorsMapperTestCase extends MapperTestCase { public void testSyntheticVectorsMinimalValidDocument() throws IOException { - assumeTrue("feature flag must be enabled for synthetic vectors", SYNTHETIC_VECTORS); for (XContentType type : XContentType.values()) { BytesReference source = generateRandomDoc(type, true, true, false, false, false); assertSyntheticVectors(buildVectorMapping(), source, type); @@ -39,7 +37,6 @@ public void testSyntheticVectorsMinimalValidDocument() throws IOException { } public void testSyntheticVectorsFullDocument() throws IOException { - assumeTrue("feature flag must be enabled for synthetic vectors", SYNTHETIC_VECTORS); for (XContentType type : XContentType.values()) { BytesReference source = generateRandomDoc(type, true, true, true, true, false); assertSyntheticVectors(buildVectorMapping(), source, type); @@ -47,7 +44,6 @@ public void testSyntheticVectorsFullDocument() throws IOException { } public void testSyntheticVectorsWithUnmappedFields() throws IOException { - assumeTrue("feature flag must be enabled for synthetic vectors", SYNTHETIC_VECTORS); for (XContentType type : XContentType.values()) { BytesReference source = generateRandomDoc(type, true, true, true, true, true); assertSyntheticVectors(buildVectorMapping(), source, type); @@ -55,7 +51,6 @@ public void testSyntheticVectorsWithUnmappedFields() throws IOException { } public void testSyntheticVectorsMissingRootFields() throws IOException { - assumeTrue("feature flag must be enabled for synthetic vectors", SYNTHETIC_VECTORS); for (XContentType type : XContentType.values()) { BytesReference source = generateRandomDoc(type, false, false, false, false, false); assertSyntheticVectors(buildVectorMapping(), source, type); @@ -63,7 +58,6 @@ public void testSyntheticVectorsMissingRootFields() throws IOException { } public void testSyntheticVectorsPartialNestedContent() throws IOException { - assumeTrue("feature flag must be enabled for synthetic vectors", SYNTHETIC_VECTORS); for (XContentType type : XContentType.values()) { BytesReference source = generateRandomDoc(type, true, true, true, false, false); assertSyntheticVectors(buildVectorMapping(), source, type); @@ -71,7 +65,6 @@ public void testSyntheticVectorsPartialNestedContent() throws IOException { } public void testFlatPathDocument() throws IOException { - assumeTrue("feature flag must be enabled for synthetic vectors", SYNTHETIC_VECTORS); for (XContentType type : XContentType.values()) { BytesReference source = generateRandomDocWithFlatPath(type); assertSyntheticVectors(buildVectorMapping(), source, type); @@ -248,7 +241,7 @@ private BytesReference generateRandomDocWithFlatPath(XContentType xContentType) } private void assertSyntheticVectors(String mapping, BytesReference source, XContentType xContentType) throws IOException { - var settings = Settings.builder().put(IndexSettings.INDEX_MAPPING_SOURCE_SYNTHETIC_VECTORS_SETTING.getKey(), true).build(); + var settings = Settings.builder().put(IndexSettings.INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING.getKey(), true).build(); MapperService mapperService = createMapperService(settings, mapping); var parsedDoc = mapperService.documentMapper().parse(new SourceToParse("0", source, xContentType)); try (var directory = newDirectory()) { diff --git a/server/src/test/java/org/elasticsearch/index/shard/ShardGetServiceTests.java b/server/src/test/java/org/elasticsearch/index/shard/ShardGetServiceTests.java index 41f70541dbf1b..385f333f0b020 100644 --- a/server/src/test/java/org/elasticsearch/index/shard/ShardGetServiceTests.java +++ b/server/src/test/java/org/elasticsearch/index/shard/ShardGetServiceTests.java @@ -36,7 +36,6 @@ import java.util.Arrays; import java.util.function.LongSupplier; -import static org.elasticsearch.index.IndexSettings.SYNTHETIC_VECTORS; import static org.elasticsearch.index.seqno.SequenceNumbers.UNASSIGNED_PRIMARY_TERM; import static org.elasticsearch.index.seqno.SequenceNumbers.UNASSIGNED_SEQ_NO; import static org.hamcrest.Matchers.equalTo; @@ -138,11 +137,19 @@ public void testGetFromTranslogWithDenseVector() throws IOException { "foo": "foo" } """, Arrays.toString(vector)); - runGetFromTranslogWithOptions(docToIndex, "\"enabled\": true", null, docToIndex, "\"text\"", "foo", "\"dense_vector\"", false); + runGetFromTranslogWithOptions( + docToIndex, + "\"enabled\": true", + Settings.builder().put(IndexSettings.INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING.getKey(), false).build(), + docToIndex, + "\"text\"", + "foo", + "\"dense_vector\"", + false + ); } public void testGetFromTranslogWithSyntheticVector() throws IOException { - assumeTrue("feature flag must be enabled for synthetic vectors", SYNTHETIC_VECTORS); float[] vector = new float[2048]; for (int i = 0; i < vector.length; i++) { vector[i] = randomByte(); @@ -156,7 +163,7 @@ public void testGetFromTranslogWithSyntheticVector() throws IOException { runGetFromTranslogWithOptions( docToIndex, "\"enabled\": true", - Settings.builder().put(IndexSettings.INDEX_MAPPING_SOURCE_SYNTHETIC_VECTORS_SETTING.getKey(), true).build(), + Settings.builder().put(IndexSettings.INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING.getKey(), true).build(), docToIndex, "\"text\"", "foo", diff --git a/x-pack/plugin/build.gradle b/x-pack/plugin/build.gradle index 7057fc41d834c..1e89582ba87e9 100644 --- a/x-pack/plugin/build.gradle +++ b/x-pack/plugin/build.gradle @@ -139,6 +139,8 @@ tasks.named("yamlRestCompatTestTransform").configure({ task -> task.skipTest("esql/192_lookup_join_on_aliases/alias-pattern-multiple", "Error message changed") task.skipTest("esql/192_lookup_join_on_aliases/fails when alias or pattern resolves to multiple", "Error message changed") task.skipTest("esql/10_basic/Test wrong LIMIT parameter", "Error message changed") + task.skipTest("ml/sparse_vector_search/Search on a sparse_vector field with dots in the field names", "Vectors are no longer returned by default") + task.skipTest("ml/sparse_vector_search/Search on a nested sparse_vector field with dots in the field names and conflicting child fields", "Vectors are no longer returned by default") task.skipTest("esql/190_lookup_join/lookup-no-key-only-key", "Requires the fix") }) diff --git a/x-pack/plugin/ml/qa/ml-with-security/build.gradle b/x-pack/plugin/ml/qa/ml-with-security/build.gradle index d18f6da13cad2..97407b882651b 100644 --- a/x-pack/plugin/ml/qa/ml-with-security/build.gradle +++ b/x-pack/plugin/ml/qa/ml-with-security/build.gradle @@ -8,7 +8,7 @@ dependencies { // bring in machine learning rest test suite restResources { restApi { - include '_common', 'cluster', 'nodes', 'indices', 'index', 'search', 'get', 'count', 'ingest', 'bulk', 'ml', 'cat' + include '_common', 'capabilities', 'cluster', 'nodes', 'indices', 'index', 'search', 'get', 'count', 'ingest', 'bulk', 'ml', 'cat' } restTests { includeXpack 'ml' diff --git a/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/TextEmbeddingQueryIT.java b/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/TextEmbeddingQueryIT.java index 620819a8898dd..e21fc7f9be1ab 100644 --- a/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/TextEmbeddingQueryIT.java +++ b/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/TextEmbeddingQueryIT.java @@ -21,6 +21,7 @@ import static org.hamcrest.Matchers.closeTo; import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.hasSize; /** @@ -100,6 +101,9 @@ public class TextEmbeddingQueryIT extends PyTorchModelRestTestCase { private static final String TOP_LEVEL_KNN_TEMPLATE = """ { + "_source": { + "exclude_vectors": false + }, "knn": { "field": "%s", "k": 5, @@ -114,6 +118,9 @@ public class TextEmbeddingQueryIT extends PyTorchModelRestTestCase { }"""; private static final String TOP_LEVEL_KNN_FILTER_TEMPLATE = """ { + "_source": { + "exclude_vectors": false + }, "knn": { "field": "%s", "k": 5, @@ -129,6 +136,9 @@ public class TextEmbeddingQueryIT extends PyTorchModelRestTestCase { }"""; private static final String TOP_LEVEL_KNN_HYBRID_ALL = """ { + "_source": { + "exclude_vectors": false + }, "knn": { "field": "embedding", "k": 3, @@ -146,6 +156,9 @@ public class TextEmbeddingQueryIT extends PyTorchModelRestTestCase { }"""; private static final String TOP_LEVEL_KNN_HYBRID_MATCH = """ { + "_source": { + "exclude_vectors": false + }, "knn": { "field": "embedding", "k": 3, @@ -163,6 +176,9 @@ public class TextEmbeddingQueryIT extends PyTorchModelRestTestCase { private static final String QUERY_DSL_KNN_TEMPLATE = """ { + "_source": { + "exclude_vectors": false + }, "query": { "knn" : { "field": "%s", @@ -178,6 +194,9 @@ public class TextEmbeddingQueryIT extends PyTorchModelRestTestCase { }"""; private static final String QUERY_DSL_KNN_FILTER_TEMPLATE = """ { + "_source": { + "exclude_vectors": false + }, "query": { "knn" : { "field": "%s", @@ -194,6 +213,9 @@ public class TextEmbeddingQueryIT extends PyTorchModelRestTestCase { }"""; private static final String QUERY_DSL_KNN_HYBRID_ALL = """ { + "_source": { + "exclude_vectors": false + }, "query": { "bool": { "should": [ @@ -220,6 +242,9 @@ public class TextEmbeddingQueryIT extends PyTorchModelRestTestCase { }"""; private static final String QUERY_DSL_KNN_HYBRID_MATCH = """ { + "_source": { + "exclude_vectors": false + }, "query": { "bool": { "should": [ @@ -554,7 +579,11 @@ public void testModelWithPrefixStrings() throws IOException { // The top hit should have the search prefix assertEquals(searchPrefix + "my words", sourceText); List foundEmbedding = (List) MapHelper.dig("_source.embedding", topHit); - assertEquals(embeddings.get(0), foundEmbedding); + var expectedEmbeddings = embeddings.get(0); + assertThat(foundEmbedding.size(), equalTo(expectedEmbeddings.size())); + for (int i = 0; i < foundEmbedding.size(); i++) { + assertEquals(expectedEmbeddings.get(i), foundEmbedding.get(i), 0.01f); + } } } diff --git a/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/TextExpansionQueryIT.java b/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/TextExpansionQueryIT.java index f1e8c9a67df44..58108b2c70b38 100644 --- a/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/TextExpansionQueryIT.java +++ b/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/TextExpansionQueryIT.java @@ -276,6 +276,9 @@ protected Response textExpansionSearch(String index, String modelText, String mo request.setJsonEntity(Strings.format(""" { + "_source": { + "exclude_vectors": false + }, "query": { "text_expansion": { "%s": { diff --git a/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/rrf/700_rrf_retriever_search_api_compatibility.yml b/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/rrf/700_rrf_retriever_search_api_compatibility.yml index 01d645fbfb4f5..89d4cb74a6210 100644 --- a/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/rrf/700_rrf_retriever_search_api_compatibility.yml +++ b/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/rrf/700_rrf_retriever_search_api_compatibility.yml @@ -980,19 +980,25 @@ setup: --- "rrf retriever with inner_hits for sub-retriever": + - skip: + features: [ "headers" ] - requires: capabilities: - method: POST path: /_search - capabilities: [ nested_retriever_inner_hits_support ] + capabilities: [ nested_retriever_inner_hits_support, exclude_source_vectors_setting ] test_runner_features: capabilities reason: "Support for propagating nested retrievers' inner hits to the top-level compound retriever is required" - do: + headers: + # Force JSON content type so that we use a parser that interprets the floating-point score as a double + Content-Type: application/json search: - _source: false index: test body: + _source: + exclude_vectors: false retriever: rrf: retrievers: [ @@ -1058,7 +1064,7 @@ setup: - match: { hits.hits.0.inner_hits.nested_data_field.hits.total.value: 1 } - match: { hits.hits.0.inner_hits.nested_data_field.hits.hits.0.fields.nested_inner_hits.0.data.0: foo } - match: { hits.hits.0.inner_hits.nested_vector_field.hits.total.value: 1 } - - match: { hits.hits.0.inner_hits.nested_vector_field.hits.hits.0.fields.nested_inner_hits.0.paragraph_id: [ 1 ] } + - match: { hits.hits.0.inner_hits.nested_vector_field.hits.hits.0.fields.nested_inner_hits.0.paragraph_id: [ 1.0 ] } - match: { hits.hits.1.inner_hits.nested_data_field.hits.total.value: 3 } - match: { hits.hits.1.inner_hits.nested_data_field.hits.hits.0.fields.nested_inner_hits.0.data.0: bar } diff --git a/x-pack/plugin/rank-vectors/src/main/java/org/elasticsearch/xpack/rank/vectors/RankVectorsPlugin.java b/x-pack/plugin/rank-vectors/src/main/java/org/elasticsearch/xpack/rank/vectors/RankVectorsPlugin.java index dd38367125692..cf302a4bebe86 100644 --- a/x-pack/plugin/rank-vectors/src/main/java/org/elasticsearch/xpack/rank/vectors/RankVectorsPlugin.java +++ b/x-pack/plugin/rank-vectors/src/main/java/org/elasticsearch/xpack/rank/vectors/RankVectorsPlugin.java @@ -20,7 +20,7 @@ import java.util.Map; -import static org.elasticsearch.index.IndexSettings.INDEX_MAPPING_SOURCE_SYNTHETIC_VECTORS_SETTING; +import static org.elasticsearch.index.IndexSettings.INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING; import static org.elasticsearch.index.mapper.FieldMapper.notInMultiFields; import static org.elasticsearch.xpack.rank.vectors.mapper.RankVectorsFieldMapper.CONTENT_TYPE; @@ -41,7 +41,7 @@ public Map getMappers() { n, c.indexVersionCreated(), getLicenseState(), - INDEX_MAPPING_SOURCE_SYNTHETIC_VECTORS_SETTING.get(c.getIndexSettings().getSettings()) + INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING.get(c.getIndexSettings().getSettings()) ); }, notInMultiFields(CONTENT_TYPE))); } diff --git a/x-pack/plugin/rank-vectors/src/main/java/org/elasticsearch/xpack/rank/vectors/mapper/RankVectorsFieldMapper.java b/x-pack/plugin/rank-vectors/src/main/java/org/elasticsearch/xpack/rank/vectors/mapper/RankVectorsFieldMapper.java index a79fb4f304f6a..e2f314abc553f 100644 --- a/x-pack/plugin/rank-vectors/src/main/java/org/elasticsearch/xpack/rank/vectors/mapper/RankVectorsFieldMapper.java +++ b/x-pack/plugin/rank-vectors/src/main/java/org/elasticsearch/xpack/rank/vectors/mapper/RankVectorsFieldMapper.java @@ -113,13 +113,13 @@ public static class Builder extends FieldMapper.Builder { private final IndexVersion indexCreatedVersion; private final XPackLicenseState licenseState; - private final boolean isSyntheticVector; + private final boolean isExcludeSourceVectors; - public Builder(String name, IndexVersion indexCreatedVersion, XPackLicenseState licenseState, boolean isSyntheticVector) { + public Builder(String name, IndexVersion indexCreatedVersion, XPackLicenseState licenseState, boolean isExcludeSourceVectors) { super(name); this.indexCreatedVersion = indexCreatedVersion; this.licenseState = licenseState; - this.isSyntheticVector = isSyntheticVector; + this.isExcludeSourceVectors = isExcludeSourceVectors; } public Builder dimensions(int dimensions) { @@ -141,7 +141,7 @@ public RankVectorsFieldMapper build(MapperBuilderContext context) { // Validate again here because the dimensions or element type could have been set programmatically, // which affects index option validity validate(); - boolean isSyntheticVectorFinal = context.isSourceSynthetic() == false && isSyntheticVector; + boolean isExcludeSourceVectorsFinal = context.isSourceSynthetic() == false && isExcludeSourceVectors; return new RankVectorsFieldMapper( leafName(), new RankVectorsFieldType( @@ -154,7 +154,7 @@ public RankVectorsFieldMapper build(MapperBuilderContext context) { builderParams(this, context), indexCreatedVersion, licenseState, - isSyntheticVectorFinal + isExcludeSourceVectorsFinal ); } } @@ -252,7 +252,7 @@ DenseVectorFieldMapper.ElementType getElementType() { private final IndexVersion indexCreatedVersion; private final XPackLicenseState licenseState; - private final boolean isSyntheticVector; + private final boolean isExcludeSourceVectors; private RankVectorsFieldMapper( String simpleName, @@ -260,12 +260,12 @@ private RankVectorsFieldMapper( BuilderParams params, IndexVersion indexCreatedVersion, XPackLicenseState licenseState, - boolean isSyntheticVector + boolean isExcludeSourceVectors ) { super(simpleName, fieldType, params); this.indexCreatedVersion = indexCreatedVersion; this.licenseState = licenseState; - this.isSyntheticVector = isSyntheticVector; + this.isExcludeSourceVectors = isExcludeSourceVectors; } @Override @@ -396,7 +396,7 @@ protected String contentType() { @Override public FieldMapper.Builder getMergeBuilder() { - return new Builder(leafName(), indexCreatedVersion, licenseState, isSyntheticVector).init(this); + return new Builder(leafName(), indexCreatedVersion, licenseState, isExcludeSourceVectors).init(this); } @Override @@ -406,7 +406,7 @@ protected SyntheticSourceSupport syntheticSourceSupport() { @Override public SourceLoader.SyntheticVectorsLoader syntheticVectorsLoader() { - if (isSyntheticVector) { + if (isExcludeSourceVectors) { var syntheticField = new DocValuesSyntheticFieldLoader(); return new SyntheticVectorsPatchFieldLoader(syntheticField, syntheticField::copyVectorsAsList); } diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/ml/sparse_vector_search.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/ml/sparse_vector_search.yml index 408ddd1ec50c6..83f62f4382431 100644 --- a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/ml/sparse_vector_search.yml +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/ml/sparse_vector_search.yml @@ -372,6 +372,11 @@ teardown: - requires: cluster_features: [ "gte_v8.16.0" ] reason: dots in field names allowed starting in in 8.16.0 + test_runner_features: [ capabilities, "close_to" ] + capabilities: + - method: GET + path: /_search + capabilities: [ exclude_source_vectors_setting ] - do: indices.create: @@ -409,17 +414,14 @@ teardown: get: index: index-with-sparse-vector2 id: "has-dots" + _source_exclude_vectors: false - - match: - _source: - ml: - tokens: - running: 2.4097164 - good: 2.170997 - run: 2.052153 - race: 1.4575411 - for: 1.1908325 - 5.0k: 2.489943 + - close_to: { _source.ml.tokens.running: { value: 2.4097164, error: 0.01 } } + - close_to: { _source.ml.tokens.good: { value: 2.170997, error: 0.01 } } + - close_to: { _source.ml.tokens.run: { value: 2.052153, error: 0.01 } } + - close_to: { _source.ml.tokens.race: { value: 1.4575411, error: 0.01 } } + - close_to: { _source.ml.tokens.for: { value: 1.1908325, error: 0.01 } } + - close_to: { _source.ml.tokens.5\\.0k: { value: 2.489943, error: 0.01 } } - do: search: @@ -439,6 +441,11 @@ teardown: - requires: cluster_features: [ "gte_v8.16.0" ] reason: dots in field names allowed starting in in 8.16.0 + test_runner_features: [ capabilities, "close_to" ] + capabilities: + - method: GET + path: /_search + capabilities: [ exclude_source_vectors_setting ] - do: indices.create: @@ -479,6 +486,7 @@ teardown: get: index: index-with-sparse-vector3 id: "parent-foo" + _source_exclude_vectors: false - match: _source: @@ -491,6 +499,7 @@ teardown: get: index: index-with-sparse-vector3 id: "parent-foo-bar" + _source_exclude_vectors: false - match: _source: diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/rank_vectors/rank_vectors_synthetic_vectors.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/rank_vectors/rank_vectors_synthetic_vectors.yml index c0df9d6a79d38..b39325d5147a8 100644 --- a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/rank_vectors/rank_vectors_synthetic_vectors.yml +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/rank_vectors/rank_vectors_synthetic_vectors.yml @@ -6,7 +6,7 @@ setup: capabilities: - method: GET path: /_search - capabilities: [ synthetic_vectors_setting ] + capabilities: [ exclude_source_vectors_setting ] - skip: features: "headers" @@ -14,8 +14,6 @@ setup: indices.create: index: test body: - settings: - index.mapping.synthetic_vectors: true mappings: properties: name: