Skip to content

Commit bd5f43f

Browse files
authored
Build the correct keyword field type for PotentiallyUnmappedKeywordEsField (#129854)
Before a `KeywordFieldType` was created that didn't set the isSyntheticSource field, causing to use the wrong block loader that would synthesize the complete _source instead of just loading values from ignored source. This PR addresses this.
1 parent e729705 commit bd5f43f

File tree

3 files changed

+177
-4
lines changed

3 files changed

+177
-4
lines changed

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,17 @@
77

88
package org.elasticsearch.xpack.esql.planner;
99

10+
import org.apache.lucene.document.FieldType;
11+
import org.apache.lucene.index.DocValuesType;
12+
import org.apache.lucene.index.IndexOptions;
1013
import org.apache.lucene.index.LeafReaderContext;
1114
import org.apache.lucene.index.SortedSetDocValues;
1215
import org.apache.lucene.search.BooleanClause;
1316
import org.apache.lucene.search.BooleanQuery;
1417
import org.apache.lucene.search.IndexSearcher;
1518
import org.apache.lucene.search.Query;
1619
import org.elasticsearch.common.logging.HeaderWarning;
20+
import org.elasticsearch.common.lucene.Lucene;
1721
import org.elasticsearch.compute.aggregation.AggregatorMode;
1822
import org.elasticsearch.compute.aggregation.GroupingAggregator;
1923
import org.elasticsearch.compute.aggregation.blockhash.BlockHash;
@@ -76,7 +80,6 @@
7680
import java.io.IOException;
7781
import java.util.ArrayList;
7882
import java.util.List;
79-
import java.util.Map;
8083
import java.util.Optional;
8184
import java.util.Set;
8285
import java.util.function.Function;
@@ -177,6 +180,13 @@ private BlockLoader getBlockLoaderFor(int shardId, Attribute attr, MappedFieldTy
177180

178181
/** A hack to pretend an unmapped field still exists. */
179182
private static class DefaultShardContextForUnmappedField extends DefaultShardContext {
183+
private static final FieldType UNMAPPED_FIELD_TYPE = new FieldType(KeywordFieldMapper.Defaults.FIELD_TYPE);
184+
static {
185+
UNMAPPED_FIELD_TYPE.setDocValuesType(DocValuesType.NONE);
186+
UNMAPPED_FIELD_TYPE.setIndexOptions(IndexOptions.NONE);
187+
UNMAPPED_FIELD_TYPE.setStored(false);
188+
UNMAPPED_FIELD_TYPE.freeze();
189+
}
180190
private final KeywordEsField unmappedEsField;
181191

182192
DefaultShardContextForUnmappedField(DefaultShardContext ctx, PotentiallyUnmappedKeywordEsField unmappedEsField) {
@@ -187,9 +197,22 @@ private static class DefaultShardContextForUnmappedField extends DefaultShardCon
187197
@Override
188198
public @Nullable MappedFieldType fieldType(String name) {
189199
var superResult = super.fieldType(name);
190-
return superResult == null && name.equals(unmappedEsField.getName())
191-
? new KeywordFieldMapper.KeywordFieldType(name, false /* isIndexed */, false /* hasDocValues */, Map.of() /* meta */)
192-
: superResult;
200+
return superResult == null && name.equals(unmappedEsField.getName()) ? createUnmappedFieldType(name, this) : superResult;
201+
}
202+
203+
static MappedFieldType createUnmappedFieldType(String name, DefaultShardContext context) {
204+
var builder = new KeywordFieldMapper.Builder(name, context.ctx.indexVersionCreated());
205+
builder.docValues(false);
206+
builder.indexed(false);
207+
return new KeywordFieldMapper.KeywordFieldType(
208+
name,
209+
UNMAPPED_FIELD_TYPE,
210+
Lucene.KEYWORD_ANALYZER,
211+
Lucene.KEYWORD_ANALYZER,
212+
Lucene.KEYWORD_ANALYZER,
213+
builder,
214+
context.ctx.isSourceSynthetic()
215+
);
193216
}
194217
}
195218

x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlannerTests.java

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,18 @@
2323
import org.elasticsearch.compute.lucene.DataPartitioning;
2424
import org.elasticsearch.compute.lucene.LuceneSourceOperator;
2525
import org.elasticsearch.compute.lucene.LuceneTopNSourceOperator;
26+
import org.elasticsearch.compute.lucene.ValuesSourceReaderOperator;
2627
import org.elasticsearch.compute.operator.SourceOperator;
2728
import org.elasticsearch.compute.test.TestBlockFactory;
2829
import org.elasticsearch.core.IOUtils;
2930
import org.elasticsearch.core.Releasable;
3031
import org.elasticsearch.core.Releasables;
3132
import org.elasticsearch.index.IndexMode;
3233
import org.elasticsearch.index.cache.query.TrivialQueryCachingPolicy;
34+
import org.elasticsearch.index.mapper.BlockLoader;
35+
import org.elasticsearch.index.mapper.BlockSourceReader;
36+
import org.elasticsearch.index.mapper.FallbackSyntheticSourceBlockLoader;
37+
import org.elasticsearch.index.mapper.MappedFieldType;
3338
import org.elasticsearch.index.mapper.MapperServiceTestCase;
3439
import org.elasticsearch.node.Node;
3540
import org.elasticsearch.plugins.ExtensiblePlugin;
@@ -42,10 +47,12 @@
4247
import org.elasticsearch.xpack.esql.core.tree.Source;
4348
import org.elasticsearch.xpack.esql.core.type.DataType;
4449
import org.elasticsearch.xpack.esql.core.type.EsField;
50+
import org.elasticsearch.xpack.esql.core.type.PotentiallyUnmappedKeywordEsField;
4551
import org.elasticsearch.xpack.esql.core.util.StringUtils;
4652
import org.elasticsearch.xpack.esql.expression.Order;
4753
import org.elasticsearch.xpack.esql.index.EsIndex;
4854
import org.elasticsearch.xpack.esql.plan.physical.EsQueryExec;
55+
import org.elasticsearch.xpack.esql.plan.physical.FieldExtractExec;
4956
import org.elasticsearch.xpack.esql.plan.physical.LimitExec;
5057
import org.elasticsearch.xpack.esql.plan.physical.ParallelExec;
5158
import org.elasticsearch.xpack.esql.plugin.EsqlPlugin;
@@ -64,6 +71,7 @@
6471

6572
import static org.hamcrest.Matchers.equalTo;
6673
import static org.hamcrest.Matchers.hasSize;
74+
import static org.hamcrest.Matchers.instanceOf;
6775
import static org.hamcrest.Matchers.lessThanOrEqualTo;
6876

6977
public class LocalExecutionPlannerTests extends MapperServiceTestCase {
@@ -84,10 +92,17 @@ public static Iterable<Object[]> parameters() throws Exception {
8492

8593
private final ArrayList<Releasable> releasables = new ArrayList<>();
8694

95+
private Settings settings = SETTINGS;
96+
8797
public LocalExecutionPlannerTests(@Name("estimatedRowSizeIsHuge") boolean estimatedRowSizeIsHuge) {
8898
this.estimatedRowSizeIsHuge = estimatedRowSizeIsHuge;
8999
}
90100

101+
@Override
102+
protected Settings getIndexSettings() {
103+
return settings;
104+
}
105+
91106
@Override
92107
protected Collection<Plugin> getPlugins() {
93108
var plugin = new SpatialPlugin();
@@ -229,6 +244,47 @@ public void testParallel() throws Exception {
229244
assertThat(plan.driverFactories, hasSize(2));
230245
}
231246

247+
public void testPlanUnmappedFieldExtractStoredSource() throws Exception {
248+
var blockLoader = constructBlockLoader();
249+
// In case of stored source we expect bytes based block source loader (this loads source from _source)
250+
assertThat(blockLoader, instanceOf(BlockSourceReader.BytesRefsBlockLoader.class));
251+
}
252+
253+
public void testPlanUnmappedFieldExtractSyntheticSource() throws Exception {
254+
// Enables synthetic source, so that fallback synthetic source blocker loader is used:
255+
settings = Settings.builder().put(settings).put("index.mapping.source.mode", "synthetic").build();
256+
257+
var blockLoader = constructBlockLoader();
258+
// In case of synthetic source we expect bytes based block source loader (this loads source from _ignored_source)
259+
assertThat(blockLoader, instanceOf(FallbackSyntheticSourceBlockLoader.class));
260+
}
261+
262+
private BlockLoader constructBlockLoader() throws IOException {
263+
EsQueryExec queryExec = new EsQueryExec(
264+
Source.EMPTY,
265+
index().name(),
266+
IndexMode.STANDARD,
267+
index().indexNameWithModes(),
268+
List.of(new FieldAttribute(Source.EMPTY, EsQueryExec.DOC_ID_FIELD.getName(), EsQueryExec.DOC_ID_FIELD)),
269+
null,
270+
null,
271+
null,
272+
between(1, 1000)
273+
);
274+
FieldExtractExec fieldExtractExec = new FieldExtractExec(
275+
Source.EMPTY,
276+
queryExec,
277+
List.of(
278+
new FieldAttribute(Source.EMPTY, "potentially_unmapped", new PotentiallyUnmappedKeywordEsField("potentially_unmapped"))
279+
),
280+
MappedFieldType.FieldExtractPreference.NONE
281+
);
282+
LocalExecutionPlanner.LocalExecutionPlan plan = planner().plan("test", FoldContext.small(), fieldExtractExec);
283+
var p = plan.driverFactories.get(0).driverSupplier().physicalOperation();
284+
var fieldInfo = ((ValuesSourceReaderOperator.Factory) p.intermediateOperatorFactories.get(0)).fields().get(0);
285+
return fieldInfo.blockLoader().apply(0);
286+
}
287+
232288
private int randomEstimatedRowSize(boolean huge) {
233289
int hugeBoundary = SourceOperator.MIN_TARGET_PAGE_SIZE * 10;
234290
return huge ? between(hugeBoundary, Integer.MAX_VALUE) : between(1, hugeBoundary);
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
---
2+
setup:
3+
- do:
4+
indices.create:
5+
index: my-index
6+
body:
7+
settings:
8+
index:
9+
mode: logsdb
10+
mappings:
11+
dynamic: false
12+
properties:
13+
"@timestamp":
14+
type: date
15+
message:
16+
type: text
17+
18+
- do:
19+
bulk:
20+
index: my-index
21+
refresh: true
22+
body:
23+
- { "index": { } }
24+
- { "@timestamp": "2024-02-12T10:30:00Z", "host.name": "foo", "agent_id": "darth-vader", "process_id": 101, "http_method": "GET", "is_https": false, "location": {"lat" : 40.7128, "lon" : -74.0060}, "message": "No, I am your father." }
25+
- { "index": { } }
26+
- { "@timestamp": "2024-02-12T10:31:00Z", "host.name": "bar", "agent_id": "yoda", "process_id": 102, "http_method": "PUT", "is_https": false, "location": {"lat" : 40.7128, "lon" : -74.0060}, "message": "Do. Or do not. There is no try." }
27+
- { "index": { } }
28+
- { "@timestamp": "2024-02-12T10:32:00Z", "host.name": "foo", "agent_id": "obi-wan", "process_id": 103, "http_method": "GET", "is_https": false, "location": {"lat" : 40.7128, "lon" : -74.0060}, "message": "May the force be with you." }
29+
- { "index": { } }
30+
- { "@timestamp": "2024-02-12T10:33:00Z", "host.name": "baz", "agent_id": "darth-vader", "process_id": 102, "http_method": "POST", "is_https": true, "location": {"lat" : 40.7128, "lon" : -74.0060}, "message": "I find your lack of faith disturbing." }
31+
- { "index": { } }
32+
- { "@timestamp": "2024-02-12T10:34:00Z", "host.name": "baz", "agent_id": "yoda", "process_id": 104, "http_method": "POST", "is_https": false, "location": {"lat" : 40.7128, "lon" : -74.0060}, "message": "Wars not make one great." }
33+
- { "index": { } }
34+
- { "@timestamp": "2024-02-12T10:35:00Z", "host.name": "foo", "agent_id": "obi-wan", "process_id": 105, "http_method": "GET", "is_https": false, "location": {"lat" : 40.7128, "lon" : -74.0060}, "message": "That's no moon. It's a space station." }
35+
36+
---
37+
teardown:
38+
- do:
39+
indices.delete:
40+
index: my-index
41+
42+
---
43+
"Simple from":
44+
- do:
45+
esql.query:
46+
body:
47+
query: 'FROM my-index | SORT @timestamp | LIMIT 1'
48+
49+
- match: {columns.0.name: "@timestamp"}
50+
- match: {columns.0.type: "date"}
51+
- match: {columns.1.name: "message"}
52+
- match: {columns.1.type: "text"}
53+
54+
- match: {values.0.0: "2024-02-12T10:30:00.000Z"}
55+
- match: {values.0.1: "No, I am your father."}
56+
57+
---
58+
"FROM with INSIST_🐔and LIMIT 1":
59+
- do:
60+
esql.query:
61+
body:
62+
query: 'FROM my-index | INSIST_🐔 host.name, agent_id, http_method | SORT @timestamp | KEEP host.name, agent_id, http_method | LIMIT 1'
63+
64+
- match: {columns.0.name: "host.name"}
65+
- match: {columns.0.type: "keyword"}
66+
- match: {columns.1.name: "agent_id"}
67+
- match: {columns.1.type: "keyword"}
68+
- match: {columns.2.name: "http_method"}
69+
- match: {columns.2.type: "keyword"}
70+
71+
- match: {values.0.0: "foo"}
72+
- match: {values.0.1: "darth-vader"}
73+
- match: {values.0.2: "GET"}
74+
75+
---
76+
"FROM with INSIST_🐔":
77+
- requires:
78+
test_runner_features: allowed_warnings_regex
79+
- do:
80+
allowed_warnings_regex:
81+
- "No limit defined, adding default limit of \\[.*\\]"
82+
esql.query:
83+
body:
84+
query: 'FROM my-index | INSIST_🐔 agent_id | SORT @timestamp | KEEP agent_id'
85+
86+
- match: {columns.0.name: "agent_id"}
87+
- match: {columns.0.type: "keyword"}
88+
89+
- match: {values.0.0: "darth-vader"}
90+
- match: {values.1.0: "yoda"}
91+
- match: {values.2.0: "obi-wan"}
92+
- match: {values.3.0: "darth-vader"}
93+
- match: {values.4.0: "yoda"}
94+
- match: {values.5.0: "obi-wan"}

0 commit comments

Comments
 (0)