Skip to content

Commit 7aeb5b9

Browse files
authored
[ES|QL] unmapped_fields="load" should not try to load subfields of flattened fields (#144190)
- Expand field caps request, so that for queries referencing "foo.bar", it also retrieves mapping for "foo", in addition to "foo.bar" and "foo.bar.*". Only done when unmapped_fields="load". - Prevent loading subfields of flattened fields from the verifier - Add yaml tests, unit tests, and capability. - Temporarily ignore csv tests that load subfields of flattened field while setting unmapped_fields="load".
1 parent ca64596 commit 7aeb5b9

File tree

8 files changed

+674
-152
lines changed

8 files changed

+674
-152
lines changed

x-pack/plugin/esql/qa/testFixtures/src/main/resources/flattened.csv-spec

Lines changed: 28 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,8 @@ FROM flattened_otel_logs
1414
;
1515

1616
// ----------------------------------------------------
17-
flattened KEEP subfield
17+
# Temporarily ignored since loading subfields of flattened type is not allowed
18+
flattened KEEP subfield-Ignore
1819
required_capability: load_flattened_field
1920
required_capability: optional_fields_v2
2021

@@ -38,7 +39,8 @@ FROM flattened_otel_logs
3839
;
3940

4041
// ----------------------------------------------------
41-
flattened DROP subfield
42+
# Temporarily ignored since loading subfields of flattened type is not allowed
43+
flattened DROP subfield-Ignore
4244
required_capability: load_flattened_field
4345
required_capability: optional_fields_v2
4446

@@ -63,7 +65,8 @@ FROM flattened_otel_logs
6365
;
6466

6567
// ----------------------------------------------------
66-
flattened DROP subfield after KEEP
68+
# Temporarily ignored since loading subfields of flattened type is not allowed
69+
flattened DROP subfield after KEEP-Ignore
6770
required_capability: load_flattened_field
6871
required_capability: optional_fields_v2
6972

@@ -88,7 +91,8 @@ FROM flattened_otel_logs
8891
;
8992

9093
// ----------------------------------------------------
91-
flattened subfield with WHERE
94+
# Temporarily ignored since loading subfields of flattened type is not allowed
95+
flattened subfield with WHERE-Ignore
9296
required_capability: load_flattened_field
9397
required_capability: optional_fields_v2
9498

@@ -104,7 +108,8 @@ FROM flattened_otel_logs
104108
;
105109

106110
// ----------------------------------------------------
107-
flattened subfield with WHERE and KEEP *
111+
# Temporarily ignored since loading subfields of flattened type is not allowed
112+
flattened subfield with WHERE and KEEP *-Ignore
108113
required_capability: load_flattened_field
109114
required_capability: optional_fields_v2
110115

@@ -121,7 +126,8 @@ FROM flattened_otel_logs
121126
;
122127

123128
// ----------------------------------------------------
124-
flattened subfield with WHERE and KEEP specific subfields
129+
# Temporarily ignored since loading subfields of flattened type is not allowed
130+
flattened subfield with WHERE and KEEP specific subfields-Ignore
125131
required_capability: load_flattened_field
126132
required_capability: optional_fields_v2
127133

@@ -138,7 +144,8 @@ FROM flattened_otel_logs
138144
;
139145

140146
// ----------------------------------------------------
141-
flattened subfield with EVAL TO_UPPER
147+
# Temporarily ignored since loading subfields of flattened type is not allowed
148+
flattened subfield with EVAL TO_UPPER-Ignore
142149
required_capability: load_flattened_field
143150
required_capability: optional_fields_v2
144151

@@ -163,7 +170,8 @@ FROM flattened_otel_logs
163170
;
164171

165172
// ----------------------------------------------------
166-
flattened subfield with EVAL LENGTH
173+
# Temporarily ignored since loading subfields of flattened type is not allowed
174+
flattened subfield with EVAL LENGTH-Ignore
167175
required_capability: load_flattened_field
168176
required_capability: optional_fields_v2
169177

@@ -188,7 +196,8 @@ FROM flattened_otel_logs
188196
;
189197

190198
// ----------------------------------------------------
191-
SORT on flattened subfield
199+
# Temporarily ignored since loading subfields of flattened type is not allowed
200+
SORT on flattened subfield-Ignore
192201
required_capability: load_flattened_field
193202
required_capability: optional_fields_v2
194203

@@ -212,7 +221,8 @@ FROM flattened_otel_logs
212221
;
213222

214223
// ----------------------------------------------------
215-
RENAME flattened subfield
224+
# Temporarily ignored since loading subfields of flattened type is not allowed
225+
RENAME flattened subfield-Ignore
216226
required_capability: load_flattened_field
217227
required_capability: optional_fields_v2
218228

@@ -237,7 +247,8 @@ FROM flattened_otel_logs
237247
;
238248

239249
// ----------------------------------------------------
240-
flattened subfield with STATS count
250+
# Temporarily ignored since loading subfields of flattened type is not allowed
251+
flattened subfield with STATS count-Ignore
241252
required_capability: load_flattened_field
242253
required_capability: optional_fields_v2
243254

@@ -251,7 +262,8 @@ count(*):long | resource.attributes.agent.type:keyword
251262
;
252263

253264
// ----------------------------------------------------
254-
flattened subfield with STATS count_distinct
265+
# Temporarily ignored since loading subfields of flattened type is not allowed
266+
flattened subfield with STATS count_distinct-Ignore
255267
required_capability: load_flattened_field
256268
required_capability: optional_fields_v2
257269

@@ -265,7 +277,8 @@ count_distinct(resource.attributes.host.name):long
265277
;
266278

267279
// ----------------------------------------------------
268-
flattened subfield with INLINE STATS
280+
# Temporarily ignored since loading subfields of flattened type is not allowed
281+
flattened subfield with INLINE STATS-Ignore
269282
required_capability: load_flattened_field
270283
required_capability: optional_fields_v2
271284

@@ -286,7 +299,8 @@ FROM flattened_otel_logs
286299
;
287300

288301
// ----------------------------------------------------
289-
flattened subfield with MV_EXPAND
302+
# Temporarily ignored since loading subfields of flattened type is not allowed
303+
flattened subfield with MV_EXPAND-Ignore
290304
required_capability: load_flattened_field
291305
required_capability: optional_fields_v2
292306

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2347,6 +2347,12 @@ public enum Cap {
23472347
*/
23482348
CHANGE_POINT_SUPPORT_NULL_COLUMN,
23492349

2350+
/**
2351+
* Reject loading sub-fields of flattened fields when {@code unmapped_fields="load"}
2352+
* See https://github.com/elastic/elasticsearch/issues/143494
2353+
*/
2354+
REJECT_LOADING_FLATTENED_SUBFIELDS,
2355+
23502356
// Last capability should still have a comma for fewer merge conflicts when adding new ones :)
23512357
// This comment prevents the semicolon from being on the previous capability when Spotless formats the file.
23522358
;

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Verifier.java

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
package org.elasticsearch.xpack.esql.analysis;
99

1010
import org.elasticsearch.index.IndexMode;
11+
import org.elasticsearch.index.mapper.flattened.FlattenedFieldMapper;
1112
import org.elasticsearch.license.XPackLicenseState;
1213
import org.elasticsearch.xpack.esql.LicenseAware;
1314
import org.elasticsearch.xpack.esql.capabilities.ConfigurationAware;
@@ -19,13 +20,16 @@
1920
import org.elasticsearch.xpack.esql.core.expression.Alias;
2021
import org.elasticsearch.xpack.esql.core.expression.Attribute;
2122
import org.elasticsearch.xpack.esql.core.expression.Expression;
23+
import org.elasticsearch.xpack.esql.core.expression.FieldAttribute;
2224
import org.elasticsearch.xpack.esql.core.expression.MetadataAttribute;
2325
import org.elasticsearch.xpack.esql.core.expression.TypeResolutions;
2426
import org.elasticsearch.xpack.esql.core.expression.UnresolvedTimestamp;
2527
import org.elasticsearch.xpack.esql.core.expression.function.Function;
2628
import org.elasticsearch.xpack.esql.core.expression.predicate.operator.comparison.BinaryComparison;
2729
import org.elasticsearch.xpack.esql.core.tree.Node;
2830
import org.elasticsearch.xpack.esql.core.type.DataType;
31+
import org.elasticsearch.xpack.esql.core.type.PotentiallyUnmappedKeywordEsField;
32+
import org.elasticsearch.xpack.esql.core.type.UnsupportedEsField;
2933
import org.elasticsearch.xpack.esql.core.util.Holder;
3034
import org.elasticsearch.xpack.esql.expression.function.TimestampAware;
3135
import org.elasticsearch.xpack.esql.expression.function.UnsupportedAttribute;
@@ -49,6 +53,7 @@
4953
import org.elasticsearch.xpack.esql.plan.logical.UnaryPlan;
5054
import org.elasticsearch.xpack.esql.plan.logical.UnionAll;
5155
import org.elasticsearch.xpack.esql.plan.logical.promql.PromqlCommand;
56+
import org.elasticsearch.xpack.esql.session.FieldNameUtils;
5257
import org.elasticsearch.xpack.esql.telemetry.FeatureMetric;
5358
import org.elasticsearch.xpack.esql.telemetry.Metrics;
5459

@@ -126,6 +131,7 @@ Collection<Failure> verify(LogicalPlan plan, BitSet partialMetrics, UnmappedReso
126131
if (unmappedResolution == UnmappedResolution.LOAD) {
127132
checkLoadModeDisallowedCommands(plan, failures);
128133
checkLoadModeDisallowedFunctions(plan, failures);
134+
checkFlattenedSubFieldLoad(plan, failures);
129135
}
130136

131137
// collect plan checkers
@@ -454,6 +460,58 @@ private static void checkLoadModeDisallowedFunctions(LogicalPlan plan, Failures
454460
);
455461
}
456462

463+
/**
464+
* Reject loading sub-fields of flattened fields when {@code unmapped_fields="load"}, by checking if any
465+
* {@link PotentiallyUnmappedKeywordEsField} is a sub-field of a parent field whose original type is flattened. The reason is that
466+
* flattened subfields resolution may eventually differ from what happens when {@code unmapped_fields="load"}.
467+
*/
468+
private static void checkFlattenedSubFieldLoad(LogicalPlan plan, Failures failures) {
469+
plan.forEachDown(EsRelation.class, esRelation -> {
470+
Set<String> flattenedFieldNames = flattenedFieldNames(esRelation.output());
471+
472+
if (flattenedFieldNames.isEmpty()) {
473+
return;
474+
}
475+
476+
for (Attribute attr : esRelation.output()) {
477+
if (!(attr instanceof FieldAttribute fa && fa.field() instanceof PotentiallyUnmappedKeywordEsField)) {
478+
continue;
479+
}
480+
481+
String name = fa.name();
482+
List<String> prefixes = FieldNameUtils.parentPrefixes(name);
483+
for (String parent : prefixes) {
484+
if (flattenedFieldNames.contains(parent)) {
485+
// It is sufficient to find "a" flattened field with a name matching the parent's.
486+
Failure failure = fail(
487+
fa,
488+
"Loading subfield [{}] when parent [{}] is of flattened field type is not supported with "
489+
+ "unmapped_fields=\"load\"",
490+
name,
491+
parent
492+
);
493+
failures.add(failure);
494+
break;
495+
}
496+
}
497+
}
498+
});
499+
}
500+
501+
private static Set<String> flattenedFieldNames(List<Attribute> attributes) {
502+
Set<String> names = new HashSet<>();
503+
504+
for (Attribute attribute : attributes) {
505+
if (attribute instanceof FieldAttribute fa
506+
&& fa.field() instanceof UnsupportedEsField uef
507+
&& uef.getOriginalTypes().contains(FlattenedFieldMapper.CONTENT_TYPE)) {
508+
names.add(fa.name());
509+
}
510+
}
511+
512+
return names;
513+
}
514+
457515
private void licenseCheck(LogicalPlan plan, Failures failures) {
458516
Consumer<Node<?>> licenseCheck = n -> {
459517
if (n instanceof LicenseAware la && la.licenseCheck(licenseState) == false) {

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -797,8 +797,11 @@ public void analyzedPlan(
797797
// Initialize the PreAnalysisResult with the local cluster's minimum transport version, so our planning will be correct also in
798798
// case of ROW queries. ROW queries can still require inter-node communication (for ENRICH and LOOKUP JOIN execution) with an older
799799
// node in the same cluster; so assuming that all nodes are on the same version as this node will be wrong and may cause bugs.
800-
PreAnalysisResult result = FieldNameUtils.resolveFieldNames(parsed, preAnalysis.enriches().isEmpty() == false)
801-
.withMinimumTransportVersion(localClusterMinimumVersion);
800+
PreAnalysisResult result = FieldNameUtils.resolveFieldNames(
801+
parsed,
802+
preAnalysis.enriches().isEmpty() == false,
803+
unmappedResolution == UnmappedResolution.LOAD
804+
).withMinimumTransportVersion(localClusterMinimumVersion);
802805
String description = requestFilter == null ? "the only attempt without filter" : "first attempt with filter";
803806

804807
resolveIndicesAndAnalyze(

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/FieldNameUtils.java

Lines changed: 41 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -48,14 +48,13 @@
4848
import org.elasticsearch.xpack.esql.plan.logical.join.LookupJoin;
4949
import org.elasticsearch.xpack.esql.session.EsqlSession.PreAnalysisResult;
5050

51+
import java.util.ArrayList;
5152
import java.util.HashSet;
5253
import java.util.List;
5354
import java.util.Locale;
5455
import java.util.Set;
5556
import java.util.function.BiConsumer;
56-
import java.util.stream.Stream;
5757

58-
import static java.util.stream.Collectors.toSet;
5958
import static org.elasticsearch.xpack.esql.core.util.StringUtils.WILDCARD;
6059

6160
public class FieldNameUtils {
@@ -65,7 +64,7 @@ public class FieldNameUtils {
6564
TRange.NAME.toLowerCase(Locale.ROOT)
6665
);
6766

68-
public static PreAnalysisResult resolveFieldNames(LogicalPlan parsed, boolean hasEnriches) {
67+
public static PreAnalysisResult resolveFieldNames(LogicalPlan parsed, boolean hasEnriches, boolean includePrefixFields) {
6968

7069
// get the field names from the parsed plan combined with the ENRICH match fields from the ENRICH policy
7170
List<LogicalPlan> inlinestats = parsed.collect(InlineStats.class::isInstance);
@@ -297,14 +296,50 @@ public static PreAnalysisResult resolveFieldNames(LogicalPlan parsed, boolean ha
297296
// there cannot be an empty list of fields, we'll ask the simplest and lightest one instead: _index
298297
return new PreAnalysisResult(IndexResolver.INDEX_METADATA_FIELD, wildcardJoinIndices);
299298
} else {
300-
HashSet<String> allFields = new HashSet<>(fieldNames.stream().flatMap(FieldNameUtils::withSubfields).collect(toSet()));
299+
Set<String> allFields = new HashSet<>();
300+
for (String name : fieldNames) {
301+
addRelatedFields(includePrefixFields, allFields, name);
302+
}
301303
allFields.add(MetadataAttribute.INDEX);
302304
return new PreAnalysisResult(allFields, wildcardJoinIndices);
303305
}
304306
}
305307

306-
private static Stream<String> withSubfields(String name) {
307-
return name.endsWith(WILDCARD) ? Stream.of(name) : Stream.of(name, name + ".*");
308+
/**
309+
* Expands a field name into a set of names to request from field caps. For example, "a.b.c" will be expanded to:
310+
* <ul>
311+
* <li>The field itself: "a.b.c"</li>
312+
* <li>Its multi-fields: "a.b.c.*". A sample case where this is required is TEXT fields that may have a ".keyword" subfield that's
313+
* implicitly used in some queries.</li>
314+
* <li>(Only when {@code unmapped_fields="load"}) All dot-delimited parent prefixes: ["a", "a.b"]. This is needed to get back flattened
315+
* parents, so the verifier can detect subfields of flattened.</li>
316+
* </ul>
317+
*/
318+
private static void addRelatedFields(boolean includeFieldParentPrefixes, Set<String> allFields, String name) {
319+
allFields.add(name);
320+
321+
if (name.endsWith(WILDCARD) == false) {
322+
allFields.add(name + ".*");
323+
}
324+
325+
if (includeFieldParentPrefixes) {
326+
allFields.addAll(parentPrefixes(name));
327+
}
328+
}
329+
330+
/**
331+
* Returns the dot-delimited parent prefixes of a field name. For example, "a.b.c" will return ["a", "a.b"].
332+
*/
333+
public static List<String> parentPrefixes(String name) {
334+
List<String> prefixes = new ArrayList<>();
335+
int pos = name.indexOf('.');
336+
337+
while (pos != -1) {
338+
prefixes.add(name.substring(0, pos));
339+
pos = name.indexOf('.', pos + 1);
340+
}
341+
342+
return prefixes;
308343
}
309344

310345
/**

0 commit comments

Comments
 (0)