Skip to content

Commit 23907f7

Browse files
committed
ESQL: Initial support for unmapped fields (elastic#119886)
This PR adds initial support for unmapped fields, using the INSIST clause. For starters, this unmapped fields without a cast. Note that the INSIST keyword is potentially a placeholder, as the method of defining an unmapped field might change in the future, e.g., use a special magic function. As this is currently under development, the actual syntax is INSIST_🐔. First stage of elastic#120072. Specifically, the following features are implemented in this PR: * Support for INSIST keyword without a cast. In particular, if the type being INSISTed upon is mapped to anything other than KEYWORD, it will result in an InvalidMappedField. There is no support for union type resolution on top of INSIST. Future PRs will handle these conflicts. There is support for multiple parameters, or INSIST on top of INSIST which is equivalent. * Enforcing that INSIST must always be on top of a FROM or another INSIST. While this may change in the future, e.g., handling cases like `FROM foo | EVAL x = 1 | INSIST bar` will not be done in this PR, as it makes handling INSIST too complicated.
1 parent bc7846f commit 23907f7

File tree

53 files changed

+3968
-2340
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

53 files changed

+3968
-2340
lines changed

docs/changelog/119886.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 119886
2+
summary: Initial support for unmapped fields
3+
area: ES|QL
4+
type: feature
5+
issues: []

server/src/main/java/org/elasticsearch/index/mapper/BlockSourceReader.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import java.io.IOException;
2323
import java.util.ArrayList;
2424
import java.util.List;
25+
import java.util.Objects;
2526

2627
/**
2728
* Loads values from {@code _source}. This whole process is very slow and cast-tastic,
@@ -230,7 +231,7 @@ private static class BytesRefs extends BlockSourceReader {
230231

231232
@Override
232233
protected void append(BlockLoader.Builder builder, Object v) {
233-
((BlockLoader.BytesRefBuilder) builder).appendBytesRef(toBytesRef(scratch, (String) v));
234+
((BlockLoader.BytesRefBuilder) builder).appendBytesRef(toBytesRef(scratch, Objects.toString(v)));
234235
}
235236

236237
@Override

x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/EsField.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,12 @@
2727
public class EsField implements Writeable {
2828

2929
private static Map<String, Writeable.Reader<? extends EsField>> readers = Map.ofEntries(
30-
Map.entry("EsField", EsField::new),
3130
Map.entry("DateEsField", DateEsField::new),
31+
Map.entry("EsField", EsField::new),
3232
Map.entry("InvalidMappedField", InvalidMappedField::new),
3333
Map.entry("KeywordEsField", KeywordEsField::new),
3434
Map.entry("MultiTypeEsField", MultiTypeEsField::new),
35+
Map.entry("PotentiallyUnmappedKeywordEsField", PotentiallyUnmappedKeywordEsField::new),
3536
Map.entry("TextEsField", TextEsField::new),
3637
Map.entry("UnsupportedEsField", UnsupportedEsField::new)
3738
);

x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/InvalidMappedField.java

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ public InvalidMappedField(String name, String errorMessage) {
4545
* Constructor supporting union types, used in ES|QL.
4646
*/
4747
public InvalidMappedField(String name, Map<String, Set<String>> typesToIndices) {
48-
this(name, makeErrorMessage(typesToIndices), new TreeMap<>(), typesToIndices);
48+
this(name, makeErrorMessage(typesToIndices, false), new TreeMap<>(), typesToIndices);
4949
}
5050

5151
private InvalidMappedField(String name, String errorMessage, Map<String, EsField> properties, Map<String, Set<String>> typesToIndices) {
@@ -107,12 +107,21 @@ public Map<String, Set<String>> getTypesToIndices() {
107107
return typesToIndices;
108108
}
109109

110-
private static String makeErrorMessage(Map<String, Set<String>> typesToIndices) {
110+
public static String makeErrorsMessageIncludingInsistKeyword(Map<String, Set<String>> typesToIndices) {
111+
return makeErrorMessage(typesToIndices, true);
112+
}
113+
114+
private static String makeErrorMessage(Map<String, Set<String>> typesToIndices, boolean includeInsistKeyword) {
111115
StringBuilder errorMessage = new StringBuilder();
116+
var isInsistKeywordOnlyKeyword = includeInsistKeyword && typesToIndices.containsKey(DataType.KEYWORD.typeName()) == false;
112117
errorMessage.append("mapped as [");
113-
errorMessage.append(typesToIndices.size());
118+
errorMessage.append(typesToIndices.size() + (isInsistKeywordOnlyKeyword ? 1 : 0));
114119
errorMessage.append("] incompatible types: ");
115120
boolean first = true;
121+
if (isInsistKeywordOnlyKeyword) {
122+
first = false;
123+
errorMessage.append("[keyword] enforced by INSIST command");
124+
}
116125
for (Map.Entry<String, Set<String>> e : typesToIndices.entrySet()) {
117126
if (first) {
118127
first = false;
@@ -121,7 +130,12 @@ private static String makeErrorMessage(Map<String, Set<String>> typesToIndices)
121130
}
122131
errorMessage.append("[");
123132
errorMessage.append(e.getKey());
124-
errorMessage.append("] in ");
133+
errorMessage.append("] ");
134+
if (e.getKey().equals(DataType.KEYWORD.typeName()) && includeInsistKeyword) {
135+
errorMessage.append("enforced by INSIST command and in ");
136+
} else {
137+
errorMessage.append("in ");
138+
}
125139
if (e.getValue().size() <= 3) {
126140
errorMessage.append(e.getValue());
127141
} else {
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License
4+
* 2.0; you may not use this file except in compliance with the Elastic License
5+
* 2.0.
6+
*/
7+
package org.elasticsearch.xpack.esql.core.type;
8+
9+
import org.elasticsearch.common.io.stream.StreamInput;
10+
11+
import java.io.IOException;
12+
13+
/**
14+
* This class is used as a marker for fields that may be unmapped, where an unmapped field is a field which exists in the _source but is not
15+
* mapped in the index. Note that this field may be mapped for some indices, but is unmapped in at least one of them.
16+
* For indices where the field is unmapped, we will try to load them directly from _source.
17+
*/
18+
public class PotentiallyUnmappedKeywordEsField extends KeywordEsField {
19+
public PotentiallyUnmappedKeywordEsField(String name) {
20+
super(name);
21+
}
22+
23+
public PotentiallyUnmappedKeywordEsField(StreamInput in) throws IOException {
24+
super(in);
25+
}
26+
27+
public String getWriteableName() {
28+
return "PotentiallyUnmappedKeywordEsField";
29+
}
30+
}

x-pack/plugin/esql/qa/server/mixed-cluster/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/mixed/MixedClusterEsqlSpecIT.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,11 @@ protected boolean supportsIndexModeLookup() throws IOException {
8585
return hasCapabilities(List.of(JOIN_LOOKUP_V12.capabilityName()));
8686
}
8787

88+
@Override
89+
protected boolean supportsSourceFieldMapping() {
90+
return false;
91+
}
92+
8893
@Override
8994
protected boolean deduplicateExactWarnings() {
9095
/*

x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/MultiClusterSpecIT.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@
5252
import static org.elasticsearch.xpack.esql.action.EsqlCapabilities.Cap.JOIN_LOOKUP_V12;
5353
import static org.elasticsearch.xpack.esql.action.EsqlCapabilities.Cap.JOIN_PLANNING_V1;
5454
import static org.elasticsearch.xpack.esql.action.EsqlCapabilities.Cap.METADATA_FIELDS_REMOTE_TEST;
55+
import static org.elasticsearch.xpack.esql.action.EsqlCapabilities.Cap.UNMAPPED_FIELDS;
5556
import static org.elasticsearch.xpack.esql.qa.rest.EsqlSpecTestCase.Mode.SYNC;
5657
import static org.mockito.ArgumentMatchers.any;
5758
import static org.mockito.Mockito.doAnswer;
@@ -127,6 +128,8 @@ protected void shouldSkipTest(String testName) throws IOException {
127128
assumeFalse("INLINESTATS not yet supported in CCS", testCase.requiredCapabilities.contains(JOIN_PLANNING_V1.capabilityName()));
128129
assumeFalse("INLINESTATS not yet supported in CCS", testCase.requiredCapabilities.contains(INLINESTATS_V3.capabilityName()));
129130
assumeFalse("LOOKUP JOIN not yet supported in CCS", testCase.requiredCapabilities.contains(JOIN_LOOKUP_V12.capabilityName()));
131+
// Unmapped fields require a coorect capability response from every cluster, which isn't currently implemented.
132+
assumeFalse("UNMAPPED FIELDS not yet supported in CCS", testCase.requiredCapabilities.contains(UNMAPPED_FIELDS.capabilityName()));
130133
}
131134

132135
private TestFeatureService remoteFeaturesService() throws IOException {
@@ -289,4 +292,9 @@ protected boolean supportsIndexModeLookup() throws IOException {
289292
// return hasCapabilities(List.of(JOIN_LOOKUP_V10.capabilityName()));
290293
return false;
291294
}
295+
296+
@Override
297+
protected boolean supportsSourceFieldMapping() {
298+
return false;
299+
}
292300
}

x-pack/plugin/esql/qa/server/multi-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/multi_node/EsqlSpecIT.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,4 +37,9 @@ public EsqlSpecIT(
3737
protected boolean enableRoundingDoubleValuesOnAsserting() {
3838
return true;
3939
}
40+
41+
@Override
42+
protected boolean supportsSourceFieldMapping() {
43+
return false;
44+
}
4045
}

x-pack/plugin/esql/qa/server/single-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/single_node/EsqlSpecIT.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,4 +42,5 @@ protected boolean enableRoundingDoubleValuesOnAsserting() {
4242
// This suite runs with more than one node and three shards in serverless
4343
return cluster.getNumNodes() > 1;
4444
}
45+
4546
}

x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/EsqlSpecTestCase.java

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
import org.elasticsearch.xpack.esql.AssertWarnings;
3030
import org.elasticsearch.xpack.esql.CsvSpecReader.CsvTestCase;
3131
import org.elasticsearch.xpack.esql.CsvTestUtils;
32+
import org.elasticsearch.xpack.esql.CsvTestUtils.ExpectedResults;
3233
import org.elasticsearch.xpack.esql.EsqlTestUtils;
3334
import org.elasticsearch.xpack.esql.SpecReader;
3435
import org.elasticsearch.xpack.esql.plugin.EsqlFeatures;
@@ -61,7 +62,6 @@
6162
import static org.elasticsearch.xpack.esql.CsvAssert.assertData;
6263
import static org.elasticsearch.xpack.esql.CsvAssert.assertMetadata;
6364
import static org.elasticsearch.xpack.esql.CsvSpecReader.specParser;
64-
import static org.elasticsearch.xpack.esql.CsvTestUtils.ExpectedResults;
6565
import static org.elasticsearch.xpack.esql.CsvTestUtils.isEnabled;
6666
import static org.elasticsearch.xpack.esql.CsvTestUtils.loadCsvSpecValues;
6767
import static org.elasticsearch.xpack.esql.CsvTestsDataLoader.availableDatasetsForEs;
@@ -70,6 +70,7 @@
7070
import static org.elasticsearch.xpack.esql.CsvTestsDataLoader.deleteInferenceEndpoint;
7171
import static org.elasticsearch.xpack.esql.CsvTestsDataLoader.loadDataSetIntoEs;
7272
import static org.elasticsearch.xpack.esql.EsqlTestUtils.classpathResources;
73+
import static org.elasticsearch.xpack.esql.action.EsqlCapabilities.Cap.SOURCE_FIELD_MAPPING;
7374

7475
// This test can run very long in serverless configurations
7576
@TimeoutSuite(millis = 30 * TimeUnits.MINUTE)
@@ -132,8 +133,10 @@ public void setup() throws IOException {
132133
createInferenceEndpoint(client());
133134
}
134135

135-
if (indexExists(availableDatasetsForEs(client(), supportsIndexModeLookup()).iterator().next().indexName()) == false) {
136-
loadDataSetIntoEs(client(), supportsIndexModeLookup());
136+
boolean supportsLookup = supportsIndexModeLookup();
137+
boolean supportsSourceMapping = supportsSourceFieldMapping();
138+
if (indexExists(availableDatasetsForEs(client(), supportsLookup, supportsSourceMapping).iterator().next().indexName()) == false) {
139+
loadDataSetIntoEs(client(), supportsLookup, supportsSourceMapping);
137140
}
138141
}
139142

@@ -172,6 +175,9 @@ protected void shouldSkipTest(String testName) throws IOException {
172175
}
173176
checkCapabilities(adminClient(), testFeatureService, testName, testCase);
174177
assumeTrue("Test " + testName + " is not enabled", isEnabled(testName, instructions, Version.CURRENT));
178+
if (supportsSourceFieldMapping() == false) {
179+
assumeFalse("source mapping tests are muted", testCase.requiredCapabilities.contains(SOURCE_FIELD_MAPPING.capabilityName()));
180+
}
175181
}
176182

177183
protected static void checkCapabilities(RestClient client, TestFeatureService testFeatureService, String testName, CsvTestCase testCase)
@@ -229,6 +235,10 @@ protected boolean supportsIndexModeLookup() throws IOException {
229235
return true;
230236
}
231237

238+
protected boolean supportsSourceFieldMapping() throws IOException {
239+
return true;
240+
}
241+
232242
protected final void doTest() throws Throwable {
233243
RequestObjectBuilder builder = new RequestObjectBuilder(randomFrom(XContentType.values()));
234244

0 commit comments

Comments
 (0)