Skip to content

Commit 9ed402c

Browse files
Merge branch 'main' into knowledge-base-index-privileges
2 parents 1c1c958 + 906bbfc commit 9ed402c

File tree

82 files changed

+882
-417
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

82 files changed

+882
-417
lines changed

build-tools-internal/src/integTest/groovy/org/elasticsearch/gradle/internal/transport/TransportVersionManagementPluginFuncTest.groovy

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,8 +46,8 @@ class TransportVersionManagementPluginFuncTest extends AbstractGradleFuncTest {
4646
javaResource("myserver", "transport/definitions/named/" + name + ".csv", ids)
4747
}
4848

49-
def initialTransportVersion(String name, String id) {
50-
javaResource("myserver", "transport/definitions/initial/" + name + ".csv", id)
49+
def unreferencedTransportVersion(String name, String id) {
50+
javaResource("myserver", "transport/definitions/unreferenced/" + name + ".csv", id)
5151
}
5252

5353
def definedAndUsedTransportVersion(String name, String ids) {
@@ -101,7 +101,7 @@ class TransportVersionManagementPluginFuncTest extends AbstractGradleFuncTest {
101101
"""
102102
namedTransportVersion("existing_91", "8012000")
103103
namedTransportVersion("existing_92", "8123000,8012001")
104-
initialTransportVersion("initial_9_0_0", "8000000")
104+
unreferencedTransportVersion("initial_9_0_0", "8000000")
105105
latestTransportVersion("9.2", "existing_92", "8123000")
106106
latestTransportVersion("9.1", "existing_92", "8012001")
107107
// a mock version of TransportVersion, just here so we can compile Dummy.java et al
@@ -303,4 +303,14 @@ class TransportVersionManagementPluginFuncTest extends AbstractGradleFuncTest {
303303
assertDefinitionsFailure(result, "Transport version definition file " +
304304
"[myserver/src/main/resources/transport/definitions/named/patch.csv] has patch version 8015001 as primary id")
305305
}
306+
307+
def "unreferenced directory is optional"() {
308+
given:
309+
file("myserver/src/main/resources/transport/unreferenced/initial_9_0_0.csv").delete()
310+
file("myserver/src/main/resources/transport/unreferenced").deleteDir()
311+
when:
312+
def result = gradleRunner(":myserver:validateTransportVersionDefinitions").build()
313+
then:
314+
result.task(":myserver:validateTransportVersionDefinitions").outcome == TaskOutcome.SUCCESS
315+
}
306316
}

build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/transport/ValidateTransportVersionResourcesTask.java

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -107,10 +107,13 @@ public void validateTransportVersions() throws IOException {
107107
// now load all definitions, do some validation and record them by various keys for later quick lookup
108108
// NOTE: this must run after loading referenced names and existing definitions
109109
// NOTE: this is sorted so that the order of cross validation is deterministic
110-
for (String subDir : List.of("initial", "named")) {
111-
try (var definitionsStream = Files.list(definitionsDir.resolve(subDir)).sorted()) {
112-
for (var definitionFile : definitionsStream.toList()) {
113-
recordAndValidateDefinition(readDefinitionFile(definitionFile));
110+
for (String subDirName : List.of("unreferenced", "named")) {
111+
Path subDir = definitionsDir.resolve(subDirName);
112+
if (Files.isDirectory(subDir)) {
113+
try (var definitionsStream = Files.list(subDir).sorted()) {
114+
for (var definitionFile : definitionsStream.toList()) {
115+
recordAndValidateDefinition(readDefinitionFile(definitionFile));
116+
}
114117
}
115118
}
116119
}

docs/changelog/131907.yaml

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
pr: 131907
2+
summary: Enable `exclude_source_vectors` by default for new indices
3+
area: Vector Search
4+
type: breaking
5+
issues: []
6+
breaking:
7+
title: Enable `exclude_source_vectors` by default for new indices
8+
area: Search
9+
details: |-
10+
The `exclude_source_vectors` setting is now enabled by default for newly created indices.
11+
This means that vector fields (e.g., `dense_vector`) are no longer stored in the `_source` field
12+
by default, although they remain fully accessible through search and retrieval operations.
13+
14+
Instead of being persisted in `_source`, vectors are now rehydrated on demand from the underlying
15+
index structures when needed. This reduces index size and improves performance for typical vector
16+
search workloads where the original vector values do not need to be part of the `_source`.
17+
18+
If your use case requires vector fields to be stored in `_source`, you can disable this behavior by
19+
setting `exclude_source_vectors: false` at index creation time.
20+
impact: |-
21+
Vector fields will no longer be stored in `_source` by default for new indices. Applications or tools
22+
that expect to see vector fields in `_source` (for raw document inspection)
23+
may need to be updated or configured to explicitly retain vectors using `exclude_source_vectors: false`.
24+
25+
Retrieval of vector fields via search or the `_source` API remains fully supported.
26+
notable: true

docs/changelog/132766.yaml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
pr: 132766
2+
summary: Change `reporting_user` role to leverage reserved kibana privileges
3+
area: Authorization
4+
type: deprecation
5+
issues: []
6+
deprecation:
7+
title: Deprecate the built-in `reporting_user` role.
8+
area: Authorization
9+
details: The `reporting_user` role is deprecated. Administrators should manage access to Kibana's reporting features via custom roles which grant the necessary privileges.
10+
impact: This role will be removed in a future version. Administrators should migrate to custom roles to avoid interruption.

docs/reference/elasticsearch/mapping-reference/dense-vector.md

Lines changed: 83 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,81 @@ PUT my-index-2
102102

103103
{{es}} uses the [HNSW algorithm](https://arxiv.org/abs/1603.09320) to support efficient kNN search. Like most kNN algorithms, HNSW is an approximate method that sacrifices result accuracy for improved speed.
104104

105+
## Accessing `dense_vector` fields in search responses
106+
```{applies_to}
107+
stack: ga 9.2
108+
serverless: ga
109+
```
110+
111+
By default, `dense_vector` fields are **not included in `_source`** in responses from the `_search`, `_msearch`, `_get`, and `_mget` APIs.
112+
This helps reduce response size and improve performance, especially in scenarios where vectors are used solely for similarity scoring and not required in the output.
113+
114+
To retrieve vector values explicitly, you can use:
115+
116+
* The `fields` option to request specific vector fields directly:
117+
118+
```console
119+
POST my-index-2/_search
120+
{
121+
"fields": ["my_vector"]
122+
}
123+
```
124+
125+
- The `_source.exclude_vectors` flag to re-enable vector inclusion in `_source` responses:
126+
127+
```console
128+
POST my-index-2/_search
129+
{
130+
"_source": {
131+
"exclude_vectors": false
132+
}
133+
}
134+
```
135+
136+
### Storage behavior and `_source`
137+
138+
By default, `dense_vector` fields are **not stored in `_source`** on disk. This is also controlled by the index setting `index.mapping.exclude_source_vectors`.
139+
This setting is enabled by default for newly created indices and can only be set at index creation time.
140+
141+
When enabled:
142+
143+
* `dense_vector` fields are removed from `_source` and the rest of the `_source` is stored as usual.
144+
* If a request includes `_source` and vector values are needed (e.g., during recovery or reindex), the vectors are rehydrated from their internal format.
145+
146+
This setting is compatible with synthetic `_source`, where the entire `_source` document is reconstructed from columnar storage. In full synthetic mode, no `_source` is stored on disk, and all fields — including vectors — are rebuilt when needed.
147+
148+
### Rehydration and precision
149+
150+
When vector values are rehydrated (e.g., for reindex, recovery, or explicit `_source` requests), they are restored from their internal format. Internally, vectors are stored at float precision, so if they were originally indexed as higher-precision types (e.g., `double` or `long`), the rehydrated values will have reduced precision. This lossy representation is intended to save space while preserving search quality.
151+
152+
### Storing original vectors in `_source`
153+
154+
If you want to preserve the original vector values exactly as they were provided, you can re-enable vector storage in `_source`:
155+
156+
```console
157+
PUT my-index-include-vectors
158+
{
159+
"settings": {
160+
"index.mapping.exclude_source_vectors": false
161+
},
162+
"mappings": {
163+
"properties": {
164+
"my_vector": {
165+
"type": "dense_vector"
166+
}
167+
}
168+
}
169+
}
170+
```
171+
172+
When this setting is disabled:
173+
174+
* `dense_vector` fields are stored as part of the `_source`, exactly as indexed.
175+
* The index will store both the original `_source` value and the internal representation used for vector search, resulting in increased storage usage.
176+
* Vectors are once again returned in `_source` by default in all relevant APIs, with no need to use `exclude_vectors` or `fields`.
177+
178+
This configuration is appropriate when full source fidelity is required, such as for auditing or round-tripping exact input values.
179+
105180
## Automatically quantize vectors for kNN search [dense-vector-quantization]
106181

107182
The `dense_vector` type supports quantization to reduce the memory footprint required when [searching](docs-content://solutions/search/vector/knn.md#approximate-knn) `float` vectors. The three following quantization strategies are supported:
@@ -266,16 +341,16 @@ $$$dense-vector-index-options$$$
266341
`type`
267342
: (Required, string) The type of kNN algorithm to use. Can be either any of:
268343
* `hnsw` - This utilizes the [HNSW algorithm](https://arxiv.org/abs/1603.09320) for scalable approximate kNN search. This supports all `element_type` values.
269-
* `int8_hnsw` - The default index type for some float vectors:
270-
271-
* {applies_to}`stack: ga 9.1` Default for float vectors with less than 384 dimensions.
344+
* `int8_hnsw` - The default index type for some float vectors:
345+
346+
* {applies_to}`stack: ga 9.1` Default for float vectors with less than 384 dimensions.
272347
* {applies_to}`stack: ga 9.0` Default for float all vectors.
273-
348+
274349
This utilizes the [HNSW algorithm](https://arxiv.org/abs/1603.09320) in addition to automatically scalar quantization for scalable approximate kNN search with `element_type` of `float`. This can reduce the memory footprint by 4x at the cost of some accuracy. See [Automatically quantize vectors for kNN search](#dense-vector-quantization).
275350
* `int4_hnsw` - This utilizes the [HNSW algorithm](https://arxiv.org/abs/1603.09320) in addition to automatically scalar quantization for scalable approximate kNN search with `element_type` of `float`. This can reduce the memory footprint by 8x at the cost of some accuracy. See [Automatically quantize vectors for kNN search](#dense-vector-quantization).
276351
* `bbq_hnsw` - This utilizes the [HNSW algorithm](https://arxiv.org/abs/1603.09320) in addition to automatically binary quantization for scalable approximate kNN search with `element_type` of `float`. This can reduce the memory footprint by 32x at the cost of accuracy. See [Automatically quantize vectors for kNN search](#dense-vector-quantization).
277-
278-
{applies_to}`stack: ga 9.1` `bbq_hnsw` is the default index type for float vectors with greater than or equal to 384 dimensions.
352+
353+
{applies_to}`stack: ga 9.1` `bbq_hnsw` is the default index type for float vectors with greater than or equal to 384 dimensions.
279354
* `flat` - This utilizes a brute-force search algorithm for exact kNN search. This supports all `element_type` values.
280355
* `int8_flat` - This utilizes a brute-force search algorithm in addition to automatically scalar quantization. Only supports `element_type` of `float`.
281356
* `int4_flat` - This utilizes a brute-force search algorithm in addition to automatically half-byte scalar quantization. Only supports `element_type` of `float`.
@@ -295,8 +370,8 @@ $$$dense-vector-index-options$$$
295370
: (Optional, object) An optional section that configures automatic vector rescoring on knn queries for the given field. Only applicable to quantized index types.
296371
:::::{dropdown} Properties of rescore_vector
297372
`oversample`
298-
: (required, float) The amount to oversample the search results by. This value should be one of the following:
299-
* Greater than `1.0` and less than `10.0`
373+
: (required, float) The amount to oversample the search results by. This value should be one of the following:
374+
* Greater than `1.0` and less than `10.0`
300375
* Exactly `0` to indicate no oversampling and rescoring should occur {applies_to}`stack: ga 9.1`
301376
: The higher the value, the more vectors will be gathered and rescored with the raw values per shard.
302377
: In case a knn query specifies a `rescore_vector` parameter, the query `rescore_vector` parameter will be used instead.

docs/reference/elasticsearch/mapping-reference/rank-vectors.md

Lines changed: 72 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -108,11 +108,81 @@ $$$rank-vectors-element-type$$$
108108
`dims`
109109
: (Optional, integer) Number of vector dimensions. Can’t exceed `4096`. If `dims` is not specified, it will be set to the length of the first vector added to the field.
110110

111+
## Accessing `dense_vector` fields in search responses
112+
```{applies_to}
113+
stack: ga 9.2
114+
serverless: ga
115+
```
116+
117+
By default, `dense_vector` fields are **not included in `_source`** in responses from the `_search`, `_msearch`, `_get`, and `_mget` APIs.
118+
This helps reduce response size and improve performance, especially in scenarios where vectors are used solely for similarity scoring and not required in the output.
119+
120+
To retrieve vector values explicitly, you can use:
121+
122+
* The `fields` option to request specific vector fields directly:
123+
124+
```console
125+
POST my-index-2/_search
126+
{
127+
"fields": ["my_vector"]
128+
}
129+
```
130+
131+
- The `_source.exclude_vectors` flag to re-enable vector inclusion in `_source` responses:
132+
133+
```console
134+
POST my-index-2/_search
135+
{
136+
"_source": {
137+
"exclude_vectors": false
138+
}
139+
}
140+
```
141+
142+
### Storage behavior and `_source`
143+
144+
By default, `rank_vectors` fields are not stored in `_source` on disk. This is also controlled by the index setting `index.mapping.exclude_source_vectors`.
145+
This setting is enabled by default for newly created indices and can only be set at index creation time.
146+
147+
When enabled:
148+
149+
* `rank_vectors` fields are removed from `_source` and the rest of the `_source` is stored as usual.
150+
* If a request includes `_source` and vector values are needed (e.g., during recovery or reindex), the vectors are rehydrated from their internal format.
151+
152+
This setting is compatible with synthetic `_source`, where the entire `_source` document is reconstructed from columnar storage. In full synthetic mode, no `_source` is stored on disk, and all fields — including vectors — are rebuilt when needed.
153+
154+
### Rehydration and precision
155+
156+
When vector values are rehydrated (e.g., for reindex, recovery, or explicit `_source` requests), they are restored from their internal format. Internally, vectors are stored at float precision, so if they were originally indexed as higher-precision types (e.g., `double` or `long`), the rehydrated values will have reduced precision. This lossy representation is intended to save space while preserving search quality.
157+
158+
### Storing original vectors in `_source`
159+
160+
If you want to preserve the original vector values exactly as they were provided, you can re-enable vector storage in `_source`:
161+
162+
```console
163+
PUT my-index-include-vectors
164+
{
165+
"settings": {
166+
"index.mapping.exclude_source_vectors": false
167+
},
168+
"mappings": {
169+
"properties": {
170+
"my_vector": {
171+
"type": "rank_vectors",
172+
"dims": 128
173+
}
174+
}
175+
}
176+
}
177+
```
111178

112-
## Synthetic `_source` [rank-vectors-synthetic-source]
179+
When this setting is disabled:
113180

114-
`rank_vectors` fields support [synthetic `_source`](mapping-source-field.md#synthetic-source) .
181+
* `rank_vectors` fields are stored as part of the `_source`, exactly as indexed.
182+
* The index will store both the original `_source` value and the internal representation used for vector search, resulting in increased storage usage.
183+
* Vectors are once again returned in `_source` by default in all relevant APIs, with no need to use `exclude_vectors` or `fields`.
115184

185+
This configuration is appropriate when full source fidelity is required, such as for auditing or round-tripping exact input values.
116186

117187
## Scoring with rank vectors [rank-vectors-scoring]
118188

0 commit comments

Comments
 (0)