Skip to content

Commit e0e6402

Browse files
alex-spiescbuescher
authored andcommitted
ESQL: Fix synthetic attribute pruning (elastic#111413)
- Fix ProjectAwayColumns to handle synthetic attributes and mark synthetically introduced Aliases as synthetic again. - Fix QueryPlan.references() - Simplify ProjectAwayColumns. - Simplify DependencyConsistency. - Make AggregateExec track the intermediate attributes it actually outputs/requries.
1 parent bfc42d9 commit e0e6402

File tree

48 files changed

+656
-345
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

48 files changed

+656
-345
lines changed

docs/changelog/111413.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 111413
2+
summary: "ESQL: Fix synthetic attribute pruning"
3+
area: ES|QL
4+
type: bug
5+
issues:
6+
- 105821

server/src/main/java/org/elasticsearch/TransportVersions.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,7 @@ static TransportVersion def(int id) {
205205
public static final TransportVersion GET_DATA_STREAMS_VERBOSE = def(8_735_00_0);
206206
public static final TransportVersion ESQL_ADD_INDEX_MODE_CONCRETE_INDICES = def(8_736_00_0);
207207
public static final TransportVersion UNASSIGNED_PRIMARY_COUNT_ON_CLUSTER_HEALTH = def(8_737_00_0);
208+
public static final TransportVersion ESQL_AGGREGATE_EXEC_TRACKS_INTERMEDIATE_ATTRS = def(8_738_00_0);
208209

209210
/*
210211
* STOP! READ THIS FIRST! No, really,

x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/Attribute.java

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,11 @@
2828
* The rest are not as they are not part of the projection and thus are not part of the derived table.
2929
*/
3030
public abstract class Attribute extends NamedExpression {
31+
/**
32+
* Changing this will break bwc with 8.15, see {@link FieldAttribute#fieldName()}.
33+
*/
34+
protected static final String SYNTHETIC_ATTRIBUTE_NAME_PREFIX = "$$";
35+
3136
public static List<NamedWriteableRegistry.Entry> getNamedWriteables() {
3237
// TODO add UnsupportedAttribute when these are moved to the same project
3338
return List.of(FieldAttribute.ENTRY, MetadataAttribute.ENTRY, ReferenceAttribute.ENTRY);
@@ -49,6 +54,10 @@ public Attribute(Source source, String name, Nullability nullability, NameId id,
4954
this.nullability = nullability;
5055
}
5156

57+
public static String rawTemporaryName(String inner, String outer, String suffix) {
58+
return SYNTHETIC_ATTRIBUTE_NAME_PREFIX + inner + "$" + outer + "$" + suffix;
59+
}
60+
5261
@Override
5362
public final Expression replaceChildren(List<Expression> newChildren) {
5463
throw new UnsupportedOperationException("this type of node doesn't have any children to replace");
@@ -123,7 +132,7 @@ public boolean equals(Object obj) {
123132

124133
@Override
125134
public String toString() {
126-
return name() + "{" + label() + "}" + "#" + id();
135+
return name() + "{" + label() + (synthetic() ? "$" : "") + "}" + "#" + id();
127136
}
128137

129138
@Override

x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/FieldAttribute.java

Lines changed: 19 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,6 @@
3030
* - nestedParent - if nested, what's the parent (which might not be the immediate one)
3131
*/
3232
public class FieldAttribute extends TypedAttribute {
33-
// TODO: This constant should not be used if possible; use .synthetic()
34-
// https://github.com/elastic/elasticsearch/issues/105821
35-
public static final String SYNTHETIC_ATTRIBUTE_NAME_PREFIX = "$$";
3633

3734
static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(
3835
Attribute.class,
@@ -52,6 +49,10 @@ public FieldAttribute(Source source, FieldAttribute parent, String name, EsField
5249
this(source, parent, name, field, Nullability.TRUE, null, false);
5350
}
5451

52+
public FieldAttribute(Source source, FieldAttribute parent, String name, EsField field, boolean synthetic) {
53+
this(source, parent, name, field, Nullability.TRUE, null, synthetic);
54+
}
55+
5556
public FieldAttribute(
5657
Source source,
5758
FieldAttribute parent,
@@ -64,7 +65,11 @@ public FieldAttribute(
6465
this(source, parent, name, field.getDataType(), field, nullability, id, synthetic);
6566
}
6667

67-
public FieldAttribute(
68+
/**
69+
* Used only for testing. Do not use this otherwise, as an explicitly set type will be ignored the next time this FieldAttribute is
70+
* {@link FieldAttribute#clone}d.
71+
*/
72+
FieldAttribute(
6873
Source source,
6974
FieldAttribute parent,
7075
String name,
@@ -147,28 +152,7 @@ public String getWriteableName() {
147152

148153
@Override
149154
protected NodeInfo<FieldAttribute> info() {
150-
return NodeInfo.create(
151-
this,
152-
(source, parent1, name, type, field1, qualifier, nullability, id, synthetic) -> new FieldAttribute(
153-
source,
154-
parent1,
155-
name,
156-
type,
157-
field1,
158-
qualifier,
159-
nullability,
160-
id,
161-
synthetic
162-
),
163-
parent,
164-
name(),
165-
dataType(),
166-
field,
167-
(String) null,
168-
nullable(),
169-
id(),
170-
synthetic()
171-
);
155+
return NodeInfo.create(this, FieldAttribute::new, parent, name(), dataType(), field, (String) null, nullable(), id(), synthetic());
172156
}
173157

174158
public FieldAttribute parent() {
@@ -185,9 +169,9 @@ public String path() {
185169
public String fieldName() {
186170
// Before 8.15, the field name was the same as the attribute's name.
187171
// On later versions, the attribute can be renamed when creating synthetic attributes.
188-
// TODO: We should use synthetic() to check for that case.
189-
// https://github.com/elastic/elasticsearch/issues/105821
190-
if (name().startsWith(SYNTHETIC_ATTRIBUTE_NAME_PREFIX) == false) {
172+
// Because until 8.15, we couldn't set `synthetic` to true due to a bug, in that version such FieldAttributes are marked by their
173+
// name starting with `$$`.
174+
if ((synthetic() || name().startsWith(SYNTHETIC_ATTRIBUTE_NAME_PREFIX)) == false) {
191175
return name();
192176
}
193177
return Strings.hasText(path) ? path + "." + field.getName() : field.getName();
@@ -211,9 +195,15 @@ private FieldAttribute innerField(EsField type) {
211195

212196
@Override
213197
protected Attribute clone(Source source, String name, DataType type, Nullability nullability, NameId id, boolean synthetic) {
198+
// Ignore `type`, this must be the same as the field's type.
214199
return new FieldAttribute(source, parent, name, field, nullability, id, synthetic);
215200
}
216201

202+
@Override
203+
public Attribute withDataType(DataType type) {
204+
throw new UnsupportedOperationException("FieldAttribute obtains its type from the contained EsField.");
205+
}
206+
217207
@Override
218208
public int hashCode() {
219209
return Objects.hash(super.hashCode(), path, field);

x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/NamedExpression.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,10 @@ public NameId id() {
5151
return id;
5252
}
5353

54+
/**
55+
* Synthetic named expressions are not user defined and usually created during optimizations and substitutions, e.g. when turning
56+
* {@code ... | STATS x = avg(2*field)} into {@code ... | EVAL $$synth$attribute = 2*field | STATS x = avg($$synth$attribute)}.
57+
*/
5458
public boolean synthetic() {
5559
return synthetic;
5660
}
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License
4+
* 2.0; you may not use this file except in compliance with the Elastic License
5+
* 2.0.
6+
*/
7+
8+
package org.elasticsearch.xpack.esql.core.expression;
9+
10+
import org.elasticsearch.xpack.esql.core.tree.Source;
11+
import org.elasticsearch.xpack.esql.core.type.DataType;
12+
import org.elasticsearch.xpack.esql.core.type.EsField;
13+
14+
public class FieldAttributeTestUtils {
15+
public static final FieldAttribute newFieldAttributeWithType(
16+
Source source,
17+
FieldAttribute parent,
18+
String name,
19+
DataType type,
20+
EsField field,
21+
Nullability nullability,
22+
NameId id,
23+
boolean synthetic
24+
) {
25+
return new FieldAttribute(source, parent, name, type, field, nullability, id, synthetic);
26+
}
27+
}

x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/AggregatorMode.java

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,24 @@
99

1010
public enum AggregatorMode {
1111

12+
/**
13+
* Maps raw inputs to intermediate outputs.
14+
*/
1215
INITIAL(false, true),
1316

17+
/**
18+
* Maps intermediate inputs to intermediate outputs.
19+
*/
1420
INTERMEDIATE(true, true),
1521

22+
/**
23+
* Maps intermediate inputs to final outputs.
24+
*/
1625
FINAL(true, false),
1726

18-
// most useful for testing
27+
/**
28+
* Maps raw inputs to final outputs. Most useful for testing.
29+
*/
1930
SINGLE(false, false);
2031

2132
private final boolean inputPartial;

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@
2626
import org.elasticsearch.xpack.esql.core.expression.Expressions;
2727
import org.elasticsearch.xpack.esql.core.expression.FieldAttribute;
2828
import org.elasticsearch.xpack.esql.core.expression.Literal;
29-
import org.elasticsearch.xpack.esql.core.expression.NameId;
3029
import org.elasticsearch.xpack.esql.core.expression.NamedExpression;
3130
import org.elasticsearch.xpack.esql.core.expression.Nullability;
3231
import org.elasticsearch.xpack.esql.core.expression.ReferenceAttribute;
@@ -65,7 +64,6 @@
6564
import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.EsqlArithmeticOperation;
6665
import org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.In;
6766
import org.elasticsearch.xpack.esql.index.EsIndex;
68-
import org.elasticsearch.xpack.esql.optimizer.LogicalPlanOptimizer;
6967
import org.elasticsearch.xpack.esql.plan.TableIdentifier;
7068
import org.elasticsearch.xpack.esql.plan.logical.Drop;
7169
import org.elasticsearch.xpack.esql.plan.logical.Enrich;
@@ -1249,12 +1247,10 @@ private Expression createIfDoesNotAlreadyExist(
12491247
List<FieldAttribute> unionFieldAttributes
12501248
) {
12511249
// Generate new ID for the field and suffix it with the data type to maintain unique attribute names.
1252-
String unionTypedFieldName = LogicalPlanOptimizer.rawTemporaryName(
1253-
fa.name(),
1254-
"converted_to",
1255-
resolvedField.getDataType().typeName()
1256-
);
1257-
FieldAttribute unionFieldAttribute = new FieldAttribute(fa.source(), fa.parent(), unionTypedFieldName, resolvedField);
1250+
// NOTE: The name has to start with $$ to not break bwc with 8.15 - in that version, this is how we had to mark this as
1251+
// synthetic to work around a bug.
1252+
String unionTypedFieldName = Attribute.rawTemporaryName(fa.name(), "converted_to", resolvedField.getDataType().typeName());
1253+
FieldAttribute unionFieldAttribute = new FieldAttribute(fa.source(), fa.parent(), unionTypedFieldName, resolvedField, true);
12581254
int existingIndex = unionFieldAttributes.indexOf(unionFieldAttribute);
12591255
if (existingIndex >= 0) {
12601256
// Do not generate multiple name/type combinations with different IDs
@@ -1280,8 +1276,16 @@ private MultiTypeEsField resolvedMultiTypeEsField(FieldAttribute fa, HashMap<Typ
12801276

12811277
private Expression typeSpecificConvert(AbstractConvertFunction convert, Source source, DataType type, InvalidMappedField mtf) {
12821278
EsField field = new EsField(mtf.getName(), type, mtf.getProperties(), mtf.isAggregatable());
1283-
NameId id = ((FieldAttribute) convert.field()).id();
1284-
FieldAttribute resolvedAttr = new FieldAttribute(source, null, field.getName(), field, Nullability.TRUE, id, false);
1279+
FieldAttribute originalFieldAttr = (FieldAttribute) convert.field();
1280+
FieldAttribute resolvedAttr = new FieldAttribute(
1281+
source,
1282+
originalFieldAttr.parent(),
1283+
originalFieldAttr.name(),
1284+
field,
1285+
originalFieldAttr.nullable(),
1286+
originalFieldAttr.id(),
1287+
true
1288+
);
12851289
return convert.replaceChildren(Collections.singletonList(resolvedAttr));
12861290
}
12871291
}
@@ -1330,11 +1334,11 @@ private static LogicalPlan planWithoutSyntheticAttributes(LogicalPlan plan) {
13301334
List<Attribute> newOutput = new ArrayList<>(output.size());
13311335

13321336
for (Attribute attr : output) {
1333-
// TODO: this should really use .synthetic()
1334-
// https://github.com/elastic/elasticsearch/issues/105821
1335-
if (attr.name().startsWith(FieldAttribute.SYNTHETIC_ATTRIBUTE_NAME_PREFIX) == false) {
1336-
newOutput.add(attr);
1337+
// Do not let the synthetic union type field attributes end up in the final output.
1338+
if (attr.synthetic() && attr instanceof FieldAttribute) {
1339+
continue;
13371340
}
1341+
newOutput.add(attr);
13381342
}
13391343

13401344
return newOutput.size() == output.size() ? plan : new Project(Source.EMPTY, plan, newOutput);

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizer.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -552,6 +552,10 @@ private Tuple<List<Attribute>, List<Stat>> pushableStats(AggregateExec aggregate
552552
singletonList(agg),
553553
emptyList()
554554
);
555+
// TODO: the attributes have been recreated here; they will have wrong name ids, and the dependency check will
556+
// probably fail when we fix https://github.com/elastic/elasticsearch/issues/105436.
557+
// We may need to refactor AbstractPhysicalOperationProviders.intermediateAttributes so it doesn't return just
558+
// a list of attributes, but a mapping from the logical to the physical attributes.
555559
tuple.v1().addAll(intermediateAttributes);
556560
tuple.v2().add(stat);
557561
}
@@ -604,6 +608,7 @@ && allowedForDocValues(fieldAttribute, agg, foundAttributes)) {
604608
agg.groupings(),
605609
orderedAggregates,
606610
agg.getMode(),
611+
agg.intermediateAttributes(),
607612
agg.estimatedRowSize()
608613
);
609614
}

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizer.java

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
import org.elasticsearch.xpack.esql.core.expression.AttributeMap;
1616
import org.elasticsearch.xpack.esql.core.expression.Expression;
1717
import org.elasticsearch.xpack.esql.core.expression.Expressions;
18-
import org.elasticsearch.xpack.esql.core.expression.FieldAttribute;
1918
import org.elasticsearch.xpack.esql.core.expression.NameId;
2019
import org.elasticsearch.xpack.esql.core.expression.NamedExpression;
2120
import org.elasticsearch.xpack.esql.core.expression.ReferenceAttribute;
@@ -123,15 +122,11 @@ public LogicalPlanOptimizer(LogicalOptimizerContext optimizerContext) {
123122
public static String temporaryName(Expression inner, Expression outer, int suffix) {
124123
String in = toString(inner);
125124
String out = toString(outer);
126-
return rawTemporaryName(in, out, String.valueOf(suffix));
125+
return Attribute.rawTemporaryName(in, out, String.valueOf(suffix));
127126
}
128127

129128
public static String locallyUniqueTemporaryName(String inner, String outer) {
130-
return FieldAttribute.SYNTHETIC_ATTRIBUTE_NAME_PREFIX + inner + "$" + outer + "$" + new NameId();
131-
}
132-
133-
public static String rawTemporaryName(String inner, String outer, String suffix) {
134-
return FieldAttribute.SYNTHETIC_ATTRIBUTE_NAME_PREFIX + inner + "$" + outer + "$" + suffix;
129+
return Attribute.rawTemporaryName(inner, outer, (new NameId()).toString());
135130
}
136131

137132
static String toString(Expression ex) {
@@ -373,9 +368,7 @@ private static AttributeReplacement renameAttributesInExpressions(
373368
if (attributeNamesToRename.contains(attr.name())) {
374369
Alias renamedAttribute = aliasesForReplacedAttributes.computeIfAbsent(attr, a -> {
375370
String tempName = locallyUniqueTemporaryName(a.name(), "temp_name");
376-
// TODO: this should be synthetic
377-
// blocked on https://github.com/elastic/elasticsearch/issues/98703
378-
return new Alias(a.source(), tempName, a, null, false);
371+
return new Alias(a.source(), tempName, a, null, true);
379372
});
380373
return renamedAttribute.toAttribute();
381374
}

0 commit comments

Comments
 (0)