Skip to content

Commit a2aa5ea

Browse files
committed
Merge branch 'main' of https://github.com/elastic/elasticsearch into rerank-refactoring
2 parents f4adb64 + 9021040 commit a2aa5ea

File tree

20 files changed

+1413
-1148
lines changed

20 files changed

+1413
-1148
lines changed

benchmarks/build.gradle

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ dependencies {
4141
}
4242
api(project(':libs:h3'))
4343
api(project(':modules:aggregations'))
44+
implementation project(':modules:mapper-extras');
4445
api(project(':x-pack:plugin:esql-core'))
4546
api(project(':x-pack:plugin:core'))
4647
api(project(':x-pack:plugin:esql'))

benchmarks/src/main/java/org/elasticsearch/benchmark/index/mapper/MapperServiceFactory.java

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
import org.elasticsearch.index.mapper.ProvidedIdFieldMapper;
3030
import org.elasticsearch.index.similarity.SimilarityService;
3131
import org.elasticsearch.indices.IndicesModule;
32+
import org.elasticsearch.plugins.MapperPlugin;
3233
import org.elasticsearch.script.Script;
3334
import org.elasticsearch.script.ScriptCompiler;
3435
import org.elasticsearch.script.ScriptContext;
@@ -38,11 +39,16 @@
3839
import java.io.IOException;
3940
import java.io.UncheckedIOException;
4041
import java.util.Collections;
42+
import java.util.List;
4143
import java.util.Map;
4244

4345
public class MapperServiceFactory {
4446

4547
public static MapperService create(String mappings) {
48+
return create(mappings, Collections.emptyList());
49+
}
50+
51+
public static MapperService create(String mappings, List<MapperPlugin> mapperPlugins) {
4652
Settings settings = Settings.builder()
4753
.put("index.number_of_replicas", 0)
4854
.put("index.number_of_shards", 1)
@@ -51,7 +57,7 @@ public static MapperService create(String mappings) {
5157
.build();
5258
IndexMetadata meta = IndexMetadata.builder("index").settings(settings).build();
5359
IndexSettings indexSettings = new IndexSettings(meta, settings);
54-
MapperRegistry mapperRegistry = new IndicesModule(Collections.emptyList()).getMapperRegistry();
60+
MapperRegistry mapperRegistry = new IndicesModule(mapperPlugins).getMapperRegistry();
5561

5662
SimilarityService similarityService = new SimilarityService(indexSettings, null, Map.of());
5763
BitsetFilterCache bitsetFilterCache = new BitsetFilterCache(indexSettings, BitsetFilterCache.Listener.NOOP);

benchmarks/src/main/java/org/elasticsearch/benchmark/xcontent/OptimizedTextBenchmark.java

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
import org.elasticsearch.common.logging.LogConfigurator;
1616
import org.elasticsearch.index.mapper.MapperService;
1717
import org.elasticsearch.index.mapper.SourceToParse;
18+
import org.elasticsearch.index.mapper.extras.MapperExtrasPlugin;
1819
import org.elasticsearch.xcontent.XContentBuilder;
1920
import org.elasticsearch.xcontent.XContentFactory;
2021
import org.elasticsearch.xcontent.XContentType;
@@ -34,6 +35,7 @@
3435
import org.openjdk.jmh.infra.Blackhole;
3536

3637
import java.io.IOException;
38+
import java.util.List;
3739
import java.util.Random;
3840
import java.util.concurrent.TimeUnit;
3941

@@ -66,7 +68,7 @@ public class OptimizedTextBenchmark {
6668
private SourceToParse[] sources;
6769

6870
private String randomValue(int length) {
69-
final String CHARS = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
71+
final String CHARS = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 ";
7072
Random random = new Random();
7173
StringBuilder builder = new StringBuilder(length);
7274
for (int i = 0; i < length; i++) {
@@ -83,17 +85,17 @@ public void setup() throws IOException {
8385
"dynamic": false,
8486
"properties": {
8587
"field": {
86-
"type": "keyword"
88+
"type": "match_only_text"
8789
}
8890
}
8991
}
9092
}
91-
""");
93+
""", List.of(new MapperExtrasPlugin()));
9294

9395
sources = new SourceToParse[nDocs];
9496
for (int i = 0; i < nDocs; i++) {
9597
XContentBuilder b = XContentFactory.jsonBuilder();
96-
b.startObject().field("field", randomValue(8)).endObject();
98+
b.startObject().field("field", randomValue(512)).endObject();
9799
sources[i] = new SourceToParse(UUIDs.randomBase64UUID(), BytesReference.bytes(b), XContentType.JSON);
98100
}
99101
}

docs/changelog/127636.yaml

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
pr: 127636
2+
summary: Disallow mixed quoted/unquoted patterns in FROM
3+
area: ES|QL
4+
type: breaking
5+
issues:
6+
- 122651
7+
breaking:
8+
title: Disallow mixed quoted/unquoted patterns in FROM
9+
area: ES|QL
10+
details: "Previously, the ES|QL grammar allowed users to individually quote constituent strings in index patterns\
11+
\ such as \"remote_cluster\":\"index_name\". This would allow users to write complex malformed index patterns\
12+
\ that often slip through grammar and the subsequent validation. This could result in runtime errors\
13+
\ that can be misleading. This change simplifies the grammar to early reject such malformed index patterns\
14+
\ at the parsing stage, allowing users to write simpler queries and see more relevant and meaningful\
15+
\ errors."
16+
impact: "Users can write queries with simpler index patterns and see more meaningful and relevant errors."
17+
notable: false

modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
import org.apache.lucene.util.IOFunction;
3232
import org.elasticsearch.common.CheckedIntFunction;
3333
import org.elasticsearch.common.lucene.Lucene;
34+
import org.elasticsearch.common.text.UTF8DecodingReader;
3435
import org.elasticsearch.common.unit.Fuzziness;
3536
import org.elasticsearch.index.IndexVersion;
3637
import org.elasticsearch.index.analysis.IndexAnalyzers;
@@ -364,7 +365,7 @@ public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions,
364365
@Override
365366
public BlockLoader blockLoader(BlockLoaderContext blContext) {
366367
if (textFieldType.isSyntheticSource()) {
367-
return new BlockStoredFieldsReader.BytesFromStringsBlockLoader(storedFieldNameForSyntheticSource());
368+
return new BlockStoredFieldsReader.BytesFromBytesRefsBlockLoader(storedFieldNameForSyntheticSource());
368369
}
369370
SourceValueFetcher fetcher = SourceValueFetcher.toString(blContext.sourcePaths(name()));
370371
// MatchOnlyText never has norms, so we have to use the field names field
@@ -385,7 +386,7 @@ public IndexFieldData.Builder fielddataBuilder(FieldDataContext fieldDataContext
385386
) {
386387
@Override
387388
protected BytesRef storedToBytesRef(Object stored) {
388-
return new BytesRef((String) stored);
389+
return (BytesRef) stored;
389390
}
390391
};
391392
}
@@ -443,18 +444,20 @@ public FieldMapper.Builder getMergeBuilder() {
443444

444445
@Override
445446
protected void parseCreateField(DocumentParserContext context) throws IOException {
446-
final String value = context.parser().textOrNull();
447+
final var value = context.parser().optimizedTextOrNull();
447448

448449
if (value == null) {
449450
return;
450451
}
451452

452-
Field field = new Field(fieldType().name(), value, fieldType);
453+
final var utfBytes = value.bytes();
454+
Field field = new Field(fieldType().name(), new UTF8DecodingReader(utfBytes), fieldType);
453455
context.doc().add(field);
454456
context.addToFieldNames(fieldType().name());
455457

456458
if (storeSource) {
457-
context.doc().add(new StoredField(fieldType().storedFieldNameForSyntheticSource(), value));
459+
final var bytesRef = new BytesRef(utfBytes.bytes(), utfBytes.offset(), utfBytes.length());
460+
context.doc().add(new StoredField(fieldType().storedFieldNameForSyntheticSource(), bytesRef));
458461
}
459462
}
460463

@@ -474,7 +477,7 @@ protected SyntheticSourceSupport syntheticSourceSupport() {
474477
() -> new StringStoredFieldFieldLoader(fieldType().storedFieldNameForSyntheticSource(), fieldType().name(), leafName()) {
475478
@Override
476479
protected void write(XContentBuilder b, Object value) throws IOException {
477-
b.value((String) value);
480+
b.value(((BytesRef) value).utf8ToString());
478481
}
479482
}
480483
);

modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/SourceConfirmedTextQuery.java

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
import org.apache.lucene.search.Weight;
4242
import org.apache.lucene.search.similarities.Similarity;
4343
import org.apache.lucene.search.similarities.Similarity.SimScorer;
44+
import org.apache.lucene.util.BytesRef;
4445
import org.apache.lucene.util.IOFunction;
4546
import org.elasticsearch.common.CheckedIntFunction;
4647
import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery;
@@ -438,7 +439,13 @@ private MemoryIndex getOrCreateMemoryIndex() throws IOException {
438439
if (value == null) {
439440
continue;
440441
}
441-
cacheEntry.memoryIndex.addField(field, value.toString(), indexAnalyzer);
442+
String valueStr;
443+
if (value instanceof BytesRef valueRef) {
444+
valueStr = valueRef.utf8ToString();
445+
} else {
446+
valueStr = value.toString();
447+
}
448+
cacheEntry.memoryIndex.addField(field, valueStr, indexAnalyzer);
442449
}
443450
}
444451
return cacheEntry.memoryIndex;

modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapperTests.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,12 @@ public void testDefaults() throws IOException {
123123
ParsedDocument doc = mapper.parse(source(b -> b.field("field", "1234")));
124124
List<IndexableField> fields = doc.rootDoc().getFields("field");
125125
assertEquals(1, fields.size());
126-
assertEquals("1234", fields.get(0).stringValue());
126+
127+
var reader = fields.get(0).readerValue();
128+
char[] buff = new char[20];
129+
assertEquals(4, reader.read(buff));
130+
assertEquals("1234", new String(buff, 0, 4));
131+
127132
IndexableFieldType fieldType = fields.get(0).fieldType();
128133
assertThat(fieldType.omitNorms(), equalTo(true));
129134
assertTrue(fieldType.tokenized());
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
10+
package org.elasticsearch.common.text;
11+
12+
import org.elasticsearch.xcontent.XContentString;
13+
14+
import java.io.Reader;
15+
import java.nio.ByteBuffer;
16+
import java.nio.CharBuffer;
17+
import java.nio.charset.CharsetDecoder;
18+
import java.nio.charset.StandardCharsets;
19+
20+
/**
21+
* Reader that decodes UTF-8 formatted bytes into chars.
22+
*/
23+
public final class UTF8DecodingReader extends Reader {
24+
private CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder();
25+
private ByteBuffer bytes;
26+
27+
public UTF8DecodingReader(ByteBuffer bytes) {
28+
this.bytes = bytes;
29+
}
30+
31+
public UTF8DecodingReader(XContentString.UTF8Bytes utf8bytes) {
32+
this.bytes = ByteBuffer.wrap(utf8bytes.bytes(), utf8bytes.offset(), utf8bytes.length());
33+
}
34+
35+
@Override
36+
public int read(char[] cbuf, int off, int len) {
37+
return read(CharBuffer.wrap(cbuf, off, len));
38+
}
39+
40+
@Override
41+
public int read(CharBuffer cbuf) {
42+
if (bytes.hasRemaining() == false) {
43+
return -1;
44+
}
45+
46+
int startPos = cbuf.position();
47+
decoder.decode(bytes, cbuf, true);
48+
return cbuf.position() - startPos;
49+
}
50+
51+
@Override
52+
public void close() {}
53+
}

x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/MultiClusterSpecIT.java

Lines changed: 44 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -258,10 +258,12 @@ static CsvSpecReader.CsvTestCase convertToRemoteIndices(CsvSpecReader.CsvTestCas
258258
String[] localIndices = fromStatement.substring("FROM ".length()).split(",");
259259
final String remoteIndices;
260260
if (canUseRemoteIndicesOnly() && randomBoolean()) {
261-
remoteIndices = Arrays.stream(localIndices).map(index -> "*:" + index.trim()).collect(Collectors.joining(","));
261+
remoteIndices = Arrays.stream(localIndices)
262+
.map(index -> unquoteAndRequoteAsRemote(index.trim(), true))
263+
.collect(Collectors.joining(","));
262264
} else {
263265
remoteIndices = Arrays.stream(localIndices)
264-
.map(index -> "*:" + index.trim() + "," + index.trim())
266+
.map(index -> unquoteAndRequoteAsRemote(index.trim(), false))
265267
.collect(Collectors.joining(","));
266268
}
267269
var newFrom = "FROM " + remoteIndices + " " + commands[0].substring(fromStatement.length());
@@ -272,9 +274,13 @@ static CsvSpecReader.CsvTestCase convertToRemoteIndices(CsvSpecReader.CsvTestCas
272274
assert parts.length >= 2 : commands[0];
273275
String[] indices = parts[1].split(",");
274276
if (canUseRemoteIndicesOnly() && randomBoolean()) {
275-
parts[1] = Arrays.stream(indices).map(index -> "*:" + index.trim()).collect(Collectors.joining(","));
277+
parts[1] = Arrays.stream(indices)
278+
.map(index -> unquoteAndRequoteAsRemote(index.trim(), true))
279+
.collect(Collectors.joining(","));
276280
} else {
277-
parts[1] = Arrays.stream(indices).map(index -> "*:" + index.trim() + "," + index.trim()).collect(Collectors.joining(","));
281+
parts[1] = Arrays.stream(indices)
282+
.map(index -> unquoteAndRequoteAsRemote(index.trim(), false))
283+
.collect(Collectors.joining(","));
278284
}
279285
String newNewMetrics = String.join(" ", parts);
280286
testCase.query = newNewMetrics + query.substring(first.length());
@@ -307,6 +313,40 @@ static boolean hasIndexMetadata(String query) {
307313
return false;
308314
}
309315

316+
/**
317+
* Since partial quoting is prohibited, we need to take the index name, unquote it,
318+
* convert it to a remote index, and then requote it. For example, "employees" is unquoted,
319+
* turned into the remote index *:employees, and then requoted to get "*:employees".
320+
* @param index Name of the index.
321+
* @param asRemoteIndexOnly If the return needs to be in the form of "*:idx,idx" or "*:idx".
322+
* @return A remote index pattern that's requoted.
323+
*/
324+
private static String unquoteAndRequoteAsRemote(String index, boolean asRemoteIndexOnly) {
325+
index = index.trim();
326+
327+
int numOfQuotes = 0;
328+
for (; numOfQuotes < index.length(); numOfQuotes++) {
329+
if (index.charAt(numOfQuotes) != '"') {
330+
break;
331+
}
332+
}
333+
334+
String unquoted = unquote(index, numOfQuotes);
335+
if (asRemoteIndexOnly) {
336+
return quote("*:" + unquoted, numOfQuotes);
337+
} else {
338+
return quote("*:" + unquoted + "," + unquoted, numOfQuotes);
339+
}
340+
}
341+
342+
private static String quote(String index, int numOfQuotes) {
343+
return "\"".repeat(numOfQuotes) + index + "\"".repeat(numOfQuotes);
344+
}
345+
346+
private static String unquote(String index, int numOfQuotes) {
347+
return index.substring(numOfQuotes, index.length() - numOfQuotes);
348+
}
349+
310350
@Override
311351
protected boolean enableRoundingDoubleValuesOnAsserting() {
312352
return true;

x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/EsqlActionIT.java

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1072,8 +1072,6 @@ public void testDataStreamPatterns() throws Exception {
10721072
testCases.put("test_ds_patterns*::data,test_ds_patterns*::failures,-test_ds_patterns_2*::data", 19L);
10731073
testCases.put("test_ds_patterns*::data,test_ds_patterns*::failures,-test_ds_patterns_2*::failures", 21L);
10741074

1075-
testCases.put("\"test_ds_patterns_1,test_ds_patterns_2\"::failures", 8L);
1076-
10771075
runDataStreamTest(testCases, new String[] { "test_ds_patterns_1", "test_ds_patterns_2", "test_ds_patterns_3" }, (key, value) -> {
10781076
try (var results = run("from " + key + " | stats count(@timestamp)")) {
10791077
assertEquals(key, 1, getValuesList(results).size());
@@ -1098,7 +1096,7 @@ public void testDataStreamInvalidPatterns() throws Exception {
10981096
// Only one selector separator is allowed per expression
10991097
testCases.put("::::data", "mismatched input '::' expecting {QUOTED_STRING, UNQUOTED_SOURCE}");
11001098
// Suffix case is not supported because there is no component named with the empty string
1101-
testCases.put("index::", "missing {QUOTED_STRING, UNQUOTED_SOURCE} at '|'");
1099+
testCases.put("index::", "missing UNQUOTED_SOURCE at '|'");
11021100

11031101
runDataStreamTest(testCases, new String[] { "test_ds_patterns_1" }, (key, value) -> {
11041102
logger.info(key);

0 commit comments

Comments
 (0)