Skip to content

Commit 64d7580

Browse files
authored
Merge branch 'main' into data-stream-settings-test-fix
2 parents a3961d8 + d729fc1 commit 64d7580

File tree

38 files changed

+1614
-1253
lines changed

38 files changed

+1614
-1253
lines changed

benchmarks/build.gradle

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ dependencies {
4141
}
4242
api(project(':libs:h3'))
4343
api(project(':modules:aggregations'))
44+
implementation project(':modules:mapper-extras');
4445
api(project(':x-pack:plugin:esql-core'))
4546
api(project(':x-pack:plugin:core'))
4647
api(project(':x-pack:plugin:esql'))

benchmarks/src/main/java/org/elasticsearch/benchmark/index/mapper/MapperServiceFactory.java

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
import org.elasticsearch.index.mapper.ProvidedIdFieldMapper;
3030
import org.elasticsearch.index.similarity.SimilarityService;
3131
import org.elasticsearch.indices.IndicesModule;
32+
import org.elasticsearch.plugins.MapperPlugin;
3233
import org.elasticsearch.script.Script;
3334
import org.elasticsearch.script.ScriptCompiler;
3435
import org.elasticsearch.script.ScriptContext;
@@ -38,11 +39,16 @@
3839
import java.io.IOException;
3940
import java.io.UncheckedIOException;
4041
import java.util.Collections;
42+
import java.util.List;
4143
import java.util.Map;
4244

4345
public class MapperServiceFactory {
4446

4547
public static MapperService create(String mappings) {
48+
return create(mappings, Collections.emptyList());
49+
}
50+
51+
public static MapperService create(String mappings, List<MapperPlugin> mapperPlugins) {
4652
Settings settings = Settings.builder()
4753
.put("index.number_of_replicas", 0)
4854
.put("index.number_of_shards", 1)
@@ -51,7 +57,7 @@ public static MapperService create(String mappings) {
5157
.build();
5258
IndexMetadata meta = IndexMetadata.builder("index").settings(settings).build();
5359
IndexSettings indexSettings = new IndexSettings(meta, settings);
54-
MapperRegistry mapperRegistry = new IndicesModule(Collections.emptyList()).getMapperRegistry();
60+
MapperRegistry mapperRegistry = new IndicesModule(mapperPlugins).getMapperRegistry();
5561

5662
SimilarityService similarityService = new SimilarityService(indexSettings, null, Map.of());
5763
BitsetFilterCache bitsetFilterCache = new BitsetFilterCache(indexSettings, BitsetFilterCache.Listener.NOOP);

benchmarks/src/main/java/org/elasticsearch/benchmark/xcontent/OptimizedTextBenchmark.java

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
import org.elasticsearch.common.logging.LogConfigurator;
1616
import org.elasticsearch.index.mapper.MapperService;
1717
import org.elasticsearch.index.mapper.SourceToParse;
18+
import org.elasticsearch.index.mapper.extras.MapperExtrasPlugin;
1819
import org.elasticsearch.xcontent.XContentBuilder;
1920
import org.elasticsearch.xcontent.XContentFactory;
2021
import org.elasticsearch.xcontent.XContentType;
@@ -34,6 +35,7 @@
3435
import org.openjdk.jmh.infra.Blackhole;
3536

3637
import java.io.IOException;
38+
import java.util.List;
3739
import java.util.Random;
3840
import java.util.concurrent.TimeUnit;
3941

@@ -66,7 +68,7 @@ public class OptimizedTextBenchmark {
6668
private SourceToParse[] sources;
6769

6870
private String randomValue(int length) {
69-
final String CHARS = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
71+
final String CHARS = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 ";
7072
Random random = new Random();
7173
StringBuilder builder = new StringBuilder(length);
7274
for (int i = 0; i < length; i++) {
@@ -83,17 +85,17 @@ public void setup() throws IOException {
8385
"dynamic": false,
8486
"properties": {
8587
"field": {
86-
"type": "keyword"
88+
"type": "match_only_text"
8789
}
8890
}
8991
}
9092
}
91-
""");
93+
""", List.of(new MapperExtrasPlugin()));
9294

9395
sources = new SourceToParse[nDocs];
9496
for (int i = 0; i < nDocs; i++) {
9597
XContentBuilder b = XContentFactory.jsonBuilder();
96-
b.startObject().field("field", randomValue(8)).endObject();
98+
b.startObject().field("field", randomValue(512)).endObject();
9799
sources[i] = new SourceToParse(UUIDs.randomBase64UUID(), BytesReference.bytes(b), XContentType.JSON);
98100
}
99101
}

docs/changelog/127636.yaml

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
pr: 127636
2+
summary: Disallow mixed quoted/unquoted patterns in FROM
3+
area: ES|QL
4+
type: breaking
5+
issues:
6+
- 122651
7+
breaking:
8+
title: Disallow mixed quoted/unquoted patterns in FROM
9+
area: ES|QL
10+
details: "Previously, the ES|QL grammar allowed users to individually quote constituent strings in index patterns\
11+
\ such as \"remote_cluster\":\"index_name\". This would allow users to write complex malformed index patterns\
12+
\ that often slip through grammar and the subsequent validation. This could result in runtime errors\
13+
\ that can be misleading. This change simplifies the grammar to early reject such malformed index patterns\
14+
\ at the parsing stage, allowing users to write simpler queries and see more relevant and meaningful\
15+
\ errors."
16+
impact: "Users can write queries with simpler index patterns and see more meaningful and relevant errors."
17+
notable: false

docs/reference/elasticsearch/rest-apis/retrieve-selected-fields.md

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ By default, each hit in the search response includes the document [`_source`](/r
1717
You can use both of these methods, though the `fields` option is preferred because it consults both the document data and index mappings. In some instances, you might want to use [other methods](#field-retrieval-methods) of retrieving data.
1818

1919

20-
### The `fields` option [search-fields-param]
20+
## The `fields` option [search-fields-param]
2121

2222
To retrieve specific fields in the search response, use the `fields` parameter. Because it consults the index mappings, the `fields` parameter provides several advantages over referencing the `_source` directly. Specifically, the `fields` parameter:
2323

@@ -33,7 +33,7 @@ Other mapping options are also respected, including [`ignore_above`](/reference/
3333
The `fields` option returns values in the way that matches how {{es}} indexes them. For standard fields, this means that the `fields` option looks in `_source` to find the values, then parses and formats them using the mappings. Selected fields that can’t be found in `_source` are skipped.
3434

3535

36-
#### Retrieve specific fields [search-fields-request]
36+
### Retrieve specific fields [search-fields-request]
3737

3838
The following search request uses the `fields` parameter to retrieve values for the `user.id` field, all fields starting with `http.response.`, and the `@timestamp` field.
3939

@@ -69,7 +69,7 @@ By default, document metadata fields like `_id` or `_index` are not returned whe
6969

7070

7171

72-
#### Response always returns an array [search-fields-response]
72+
### Response always returns an array [search-fields-response]
7373

7474
The `fields` response always returns an array of values for each field, even when there is a single value in the `_source`. This is because {{es}} has no dedicated array type, and any field could contain multiple values. The `fields` parameter also does not guarantee that array values are returned in a specific order. See the mapping documentation on [arrays](/reference/elasticsearch/mapping-reference/array.md) for more background.
7575

@@ -109,7 +109,7 @@ The response includes values as a flat list in the `fields` section for each hit
109109
```
110110

111111

112-
#### Retrieve nested fields [search-fields-nested]
112+
### Retrieve nested fields [search-fields-nested]
113113

114114
::::{dropdown}
115115
The `fields` response for [`nested` fields](/reference/elasticsearch/mapping-reference/nested.md) is slightly different from that of regular object fields. While leaf values inside regular `object` fields are returned as a flat list, values inside `nested` fields are grouped to maintain the independence of each object inside the original nested array. For each entry inside a nested field array, values are again returned as a flat list unless there are other `nested` fields inside the parent nested object, in which case the same procedure is repeated again for the deeper nested fields.
@@ -246,7 +246,7 @@ However, when the `fields` pattern targets the nested `user` field directly, no
246246

247247

248248

249-
#### Retrieve unmapped fields [retrieve-unmapped-fields]
249+
### Retrieve unmapped fields [retrieve-unmapped-fields]
250250

251251
::::{dropdown}
252252
By default, the `fields` parameter returns only values of mapped fields. However, {{es}} allows storing fields in `_source` that are unmapped, such as setting [dynamic field mapping](docs-content://manage-data/data-store/mapping/dynamic-field-mapping.md) to `false` or by using an object field with `enabled: false`. These options disable parsing and indexing of the object content.
@@ -326,7 +326,7 @@ The response will contain field results under the `session_data.object.*` path,
326326

327327

328328

329-
#### Ignored field values [ignored-field-values]
329+
### Ignored field values [ignored-field-values]
330330

331331
::::{dropdown}
332332
The `fields` section of the response only returns values that were valid when indexed. If your search request asks for values from a field that ignored certain values because they were malformed or too large these values are returned separately in an `ignored_field_values` section.
@@ -578,6 +578,7 @@ Also only leaf fields can be returned via the `stored_fields` option. If an obje
578578
On its own, `stored_fields` cannot be used to load fields in nested objects — if a field contains a nested object in its path, then no data will be returned for that stored field. To access nested fields, `stored_fields` must be used within an [`inner_hits`](/reference/elasticsearch/rest-apis/retrieve-inner-hits.md) block.
579579
::::
580580

581+
For an example that uses the `stored_fields` parameter, refer to [](retrieve-stored-fields.md).
581582

582583

583584
##### Disable stored fields [disable-stored-fields]
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
---
2+
navigation_title: "Retrieve stored fields"
3+
mapped_pages:
4+
- https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-get.html
5+
applies_to:
6+
stack: all
7+
---
8+
9+
# Retrieve stored fields using the Get document API [get-stored-fields]
10+
11+
Use the `stored_fields` query parameter in a [Get document](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-get) API request to retrieve fields marked as stored (`"store": true`) in the index mapping.
12+
13+
Fields not marked as stored are excluded from the response, even if specified in the request.
14+
15+
::::{tip}
16+
In most cases, the [`fields`](retrieve-selected-fields.md#search-fields-param) and [`_source`](retrieve-selected-fields.md#source-filtering) parameters produce better results than `stored_fields`.
17+
::::
18+
19+
For example, these PUT requests define a stored field in the mapping and add a document:
20+
21+
```console
22+
PUT my-index-000001
23+
{
24+
"mappings": {
25+
"properties": {
26+
"counter": {
27+
"type": "integer",
28+
"store": false
29+
},
30+
"tags": {
31+
"type": "keyword",
32+
"store": true
33+
}
34+
}
35+
}
36+
}
37+
```
38+
39+
```console
40+
PUT my-index-000001/_doc/1
41+
{
42+
"counter": 1,
43+
"tags": [ "production" ]
44+
}
45+
```
46+
47+
% TEST[continued]
48+
49+
This request retrieves the stored fields from the document:
50+
51+
```console
52+
GET my-index-000001/_doc/1?stored_fields=tags,counter
53+
```
54+
55+
% TEST[continued]
56+
57+
The API returns the following response:
58+
59+
```console-result
60+
{
61+
"_index": "my-index-000001",
62+
"_id": "1",
63+
"_version": 1,
64+
"_seq_no": 22,
65+
"_primary_term": 1,
66+
"found": true,
67+
"fields": {
68+
"tags": [
69+
"production"
70+
]
71+
}
72+
}
73+
```
74+
75+
% TESTRESPONSE[s/"_seq_no" : \d+/"_seq_no" : $body._seq_no/ s/"_primary_term" : 1/"_primary_term" : $body._primary_term/]
76+
77+
Although the `counter` field is specified in the request, it's not included in the response because it's not actually a stored field.
78+
79+
Field values are returned as an array.
80+
81+
::::{note}
82+
Only leaf fields can be retrieved with the `stored_fields` parameter. If you specify an object field instead, an error is returned.
83+
::::
84+

docs/reference/elasticsearch/toc.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ toc:
9393
- file: rest-apis/reindex-indices.md
9494
- file: rest-apis/retrieve-inner-hits.md
9595
- file: rest-apis/retrieve-selected-fields.md
96+
- file: rest-apis/retrieve-stored-fields.md
9697
- file: rest-apis/retrievers.md
9798
- file: rest-apis/search-multiple-data-streams-indices.md
9899
- file: rest-apis/search-profile.md

modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
import org.apache.lucene.util.IOFunction;
3232
import org.elasticsearch.common.CheckedIntFunction;
3333
import org.elasticsearch.common.lucene.Lucene;
34+
import org.elasticsearch.common.text.UTF8DecodingReader;
3435
import org.elasticsearch.common.unit.Fuzziness;
3536
import org.elasticsearch.index.IndexVersion;
3637
import org.elasticsearch.index.analysis.IndexAnalyzers;
@@ -364,7 +365,7 @@ public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions,
364365
@Override
365366
public BlockLoader blockLoader(BlockLoaderContext blContext) {
366367
if (textFieldType.isSyntheticSource()) {
367-
return new BlockStoredFieldsReader.BytesFromStringsBlockLoader(storedFieldNameForSyntheticSource());
368+
return new BlockStoredFieldsReader.BytesFromBytesRefsBlockLoader(storedFieldNameForSyntheticSource());
368369
}
369370
SourceValueFetcher fetcher = SourceValueFetcher.toString(blContext.sourcePaths(name()));
370371
// MatchOnlyText never has norms, so we have to use the field names field
@@ -385,7 +386,7 @@ public IndexFieldData.Builder fielddataBuilder(FieldDataContext fieldDataContext
385386
) {
386387
@Override
387388
protected BytesRef storedToBytesRef(Object stored) {
388-
return new BytesRef((String) stored);
389+
return (BytesRef) stored;
389390
}
390391
};
391392
}
@@ -443,18 +444,20 @@ public FieldMapper.Builder getMergeBuilder() {
443444

444445
@Override
445446
protected void parseCreateField(DocumentParserContext context) throws IOException {
446-
final String value = context.parser().textOrNull();
447+
final var value = context.parser().optimizedTextOrNull();
447448

448449
if (value == null) {
449450
return;
450451
}
451452

452-
Field field = new Field(fieldType().name(), value, fieldType);
453+
final var utfBytes = value.bytes();
454+
Field field = new Field(fieldType().name(), new UTF8DecodingReader(utfBytes), fieldType);
453455
context.doc().add(field);
454456
context.addToFieldNames(fieldType().name());
455457

456458
if (storeSource) {
457-
context.doc().add(new StoredField(fieldType().storedFieldNameForSyntheticSource(), value));
459+
final var bytesRef = new BytesRef(utfBytes.bytes(), utfBytes.offset(), utfBytes.length());
460+
context.doc().add(new StoredField(fieldType().storedFieldNameForSyntheticSource(), bytesRef));
458461
}
459462
}
460463

@@ -474,7 +477,7 @@ protected SyntheticSourceSupport syntheticSourceSupport() {
474477
() -> new StringStoredFieldFieldLoader(fieldType().storedFieldNameForSyntheticSource(), fieldType().name(), leafName()) {
475478
@Override
476479
protected void write(XContentBuilder b, Object value) throws IOException {
477-
b.value((String) value);
480+
b.value(((BytesRef) value).utf8ToString());
478481
}
479482
}
480483
);

modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/SourceConfirmedTextQuery.java

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
import org.apache.lucene.search.Weight;
4242
import org.apache.lucene.search.similarities.Similarity;
4343
import org.apache.lucene.search.similarities.Similarity.SimScorer;
44+
import org.apache.lucene.util.BytesRef;
4445
import org.apache.lucene.util.IOFunction;
4546
import org.elasticsearch.common.CheckedIntFunction;
4647
import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery;
@@ -438,7 +439,13 @@ private MemoryIndex getOrCreateMemoryIndex() throws IOException {
438439
if (value == null) {
439440
continue;
440441
}
441-
cacheEntry.memoryIndex.addField(field, value.toString(), indexAnalyzer);
442+
String valueStr;
443+
if (value instanceof BytesRef valueRef) {
444+
valueStr = valueRef.utf8ToString();
445+
} else {
446+
valueStr = value.toString();
447+
}
448+
cacheEntry.memoryIndex.addField(field, valueStr, indexAnalyzer);
442449
}
443450
}
444451
return cacheEntry.memoryIndex;

modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapperTests.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,12 @@ public void testDefaults() throws IOException {
123123
ParsedDocument doc = mapper.parse(source(b -> b.field("field", "1234")));
124124
List<IndexableField> fields = doc.rootDoc().getFields("field");
125125
assertEquals(1, fields.size());
126-
assertEquals("1234", fields.get(0).stringValue());
126+
127+
var reader = fields.get(0).readerValue();
128+
char[] buff = new char[20];
129+
assertEquals(4, reader.read(buff));
130+
assertEquals("1234", new String(buff, 0, 4));
131+
127132
IndexableFieldType fieldType = fields.get(0).fieldType();
128133
assertThat(fieldType.omitNorms(), equalTo(true));
129134
assertTrue(fieldType.tokenized());

0 commit comments

Comments
 (0)