Skip to content

Commit 186a040

Browse files
committed
Implement fix in a different way
1 parent 2b6ae2f commit 186a040

File tree

1 file changed

+83
-45
lines changed

1 file changed

+83
-45
lines changed

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/CombineProjections.java

Lines changed: 83 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,13 @@
1818
import org.elasticsearch.xpack.esql.core.expression.ReferenceAttribute;
1919
import org.elasticsearch.xpack.esql.expression.function.grouping.GroupingFunction;
2020
import org.elasticsearch.xpack.esql.plan.logical.Aggregate;
21+
import org.elasticsearch.xpack.esql.plan.logical.Eval;
2122
import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan;
2223
import org.elasticsearch.xpack.esql.plan.logical.Project;
2324
import org.elasticsearch.xpack.esql.plan.logical.UnaryPlan;
2425

2526
import java.util.ArrayList;
27+
import java.util.HashSet;
2628
import java.util.LinkedHashSet;
2729
import java.util.List;
2830

@@ -57,9 +59,7 @@ protected LogicalPlan rule(UnaryPlan plan) {
5759
// project can be fully removed
5860
if (newAggs != null) {
5961
var newGroups = replacePrunedAliasesUsedInGroupBy(a.groupings(), aggs, newAggs);
60-
if (local == false || newGroups.size() == a.groupings().size()) {
61-
plan = a.with(newGroups, newAggs);
62-
}
62+
plan = a.with(newGroups, newAggs);
6363
}
6464
}
6565
return plan;
@@ -68,21 +68,92 @@ protected LogicalPlan rule(UnaryPlan plan) {
6868
// Agg with underlying Project (group by on sub-queries)
6969
if (plan instanceof Aggregate a && child instanceof Project p) {
7070
var groupings = a.groupings();
71-
List<NamedExpression> groupingAttrs = new ArrayList<>(a.groupings().size());
71+
72+
// sanity checks
7273
for (Expression grouping : groupings) {
73-
if (grouping instanceof Attribute attribute) {
74-
groupingAttrs.add(attribute);
75-
} else if (grouping instanceof Alias as && as.child() instanceof GroupingFunction.NonEvaluatableGroupingFunction) {
76-
groupingAttrs.add(as);
77-
} else {
74+
if ((grouping instanceof Attribute
75+
|| grouping instanceof Alias as && as.child() instanceof GroupingFunction.NonEvaluatableGroupingFunction) == false) {
7876
// After applying ReplaceAggregateNestedExpressionWithEval,
7977
// evaluatable groupings can only contain attributes.
80-
throw new EsqlIllegalArgumentException("Expected an Attribute, got {}", grouping);
78+
throw new EsqlIllegalArgumentException("Expected an attribute or grouping function, got {}", grouping);
8179
}
8280
}
83-
List<Expression> newGroupings = combineUpperGroupingsAndLowerProjections(groupingAttrs, p.projections());
84-
if (local == false || newGroupings.size() == a.groupings().size()) {
81+
assert groupings.size() <= 1
82+
|| groupings.stream()
83+
.anyMatch(group -> group.anyMatch(expr -> expr instanceof GroupingFunction.NonEvaluatableGroupingFunction)) == false
84+
: "CombineProjections only tested with a single CATEGORIZE with no additional groups";
85+
86+
// Collect the alias map for resolving the source (f1 = 1, f2 = f1, etc..)
87+
AttributeMap.Builder<Attribute> aliasesBuilder = AttributeMap.builder();
88+
for (NamedExpression ne : p.projections()) {
89+
// Record the aliases.
90+
// Projections are just aliases for attributes, so casting is safe.
91+
aliasesBuilder.put(ne.toAttribute(), (Attribute) Alias.unwrap(ne));
92+
}
93+
var aliases = aliasesBuilder.build();
94+
95+
// Propagate any renames from the lower projection into the upper groupings.
96+
List<Expression> resolvedGroupings = new ArrayList<>();
97+
for (Expression grouping : groupings) {
98+
Expression transformed = grouping.transformUp(Attribute.class, as -> aliases.resolve(as, as));
99+
resolvedGroupings.add(transformed);
100+
}
101+
102+
// This can lead to duplicates in the groupings: e.g.
103+
// | EVAL x = y | STATS ... BY x, y
104+
if (local == false) {
105+
// On the coordinator, we can just discard the duplicates.
106+
107+
// All substitutions happen before; groupings must be attributes at this point except for non-evaluatable groupings which
108+
// will be an alias like `c = CATEGORIZE(attribute)`.
109+
// Due to such aliases, we can't use an AttributeSet to deduplicate. But we can use a regular set to deduplicate based on
110+
// regular equality (i.e. based on names) instead of name ids.
111+
// TODO: The deduplication based on simple equality will be insufficient in case of multiple non-evaluatable groupings, e.g.
112+
// for `| EVAL x = y | STATS ... BY CATEGORIZE(x), CATEGORIZE(y)`. That will require semantic equality instead. Also
113+
// applies in the local case below.
114+
LinkedHashSet<Expression> deduplicatedResolvedGroupings = new LinkedHashSet<>(resolvedGroupings);
115+
List<Expression> newGroupings = new ArrayList<>(deduplicatedResolvedGroupings);
85116
plan = a.with(p.child(), newGroupings, combineProjections(a.aggregates(), p.projections()));
117+
} else {
118+
// On the data node, the groupings must be preserved because they affect the physical output (see
119+
// AbstractPhysicalOperationProviders#intermediateAttributes).
120+
// In case that propagating the lower projection leads to duplicates in the resolved groupings, we'll leave an Eval in place
121+
// of the original projection to create new attributes for the duplicate groups.
122+
HashSet<Expression> seenResolvedGroupings = new HashSet<>(resolvedGroupings.size());
123+
List<Expression> newGroupings = new ArrayList<>();
124+
List<Alias> aliasesAgainstDuplication = new ArrayList<>();
125+
126+
for (int i = 0; i < groupings.size(); i++) {
127+
Expression resolvedGrouping = resolvedGroupings.get(i);
128+
if (seenResolvedGroupings.add(resolvedGrouping)) {
129+
newGroupings.add(resolvedGrouping);
130+
} else {
131+
// resolving the renames leads to a duplicate here - we need to alias the underlying attribute this refers to.
132+
// should really only be 1 attribute, anyway, but going via .references() includes the case of a
133+
// GroupingFunction.NonEvaluatableGroupingFunction.
134+
Attribute coreAttribute = resolvedGrouping.references().iterator().next();
135+
136+
Alias renameAgainstDuplication = new Alias(
137+
coreAttribute.source(),
138+
TemporaryNameUtils.locallyUniqueTemporaryName(coreAttribute.name()),
139+
coreAttribute
140+
);
141+
aliasesAgainstDuplication.add(renameAgainstDuplication);
142+
143+
// propagate the new alias into the new grouping
144+
AttributeMap.Builder<Attribute> resolverBuilder = AttributeMap.builder();
145+
resolverBuilder.put(coreAttribute, renameAgainstDuplication.toAttribute());
146+
resolverBuilder.build();
147+
AttributeMap<Attribute> resolver = resolverBuilder.build();
148+
149+
newGroupings.add(resolvedGrouping.transformUp(Attribute.class, attr -> resolver.resolve(attr, attr)));
150+
}
151+
}
152+
153+
LogicalPlan newChild = aliasesAgainstDuplication.isEmpty()
154+
? p.child()
155+
: new Eval(p.source(), p.child(), aliasesAgainstDuplication);
156+
plan = a.with(newChild, newGroupings, combineProjections(a.aggregates(), p.projections()));
86157
}
87158
}
88159

@@ -145,39 +216,6 @@ private static List<NamedExpression> combineProjections(List<? extends NamedExpr
145216
return replaced;
146217
}
147218

148-
private static List<Expression> combineUpperGroupingsAndLowerProjections(
149-
List<? extends NamedExpression> upperGroupings,
150-
List<? extends NamedExpression> lowerProjections
151-
) {
152-
assert upperGroupings.size() <= 1
153-
|| upperGroupings.stream()
154-
.anyMatch(group -> group.anyMatch(expr -> expr instanceof GroupingFunction.NonEvaluatableGroupingFunction)) == false
155-
: "CombineProjections only tested with a single CATEGORIZE with no additional groups";
156-
// Collect the alias map for resolving the source (f1 = 1, f2 = f1, etc..)
157-
AttributeMap.Builder<Attribute> aliasesBuilder = AttributeMap.builder();
158-
for (NamedExpression ne : lowerProjections) {
159-
// Record the aliases.
160-
// Projections are just aliases for attributes, so casting is safe.
161-
aliasesBuilder.put(ne.toAttribute(), (Attribute) Alias.unwrap(ne));
162-
}
163-
var aliases = aliasesBuilder.build();
164-
165-
// Propagate any renames from the lower projection into the upper groupings.
166-
// This can lead to duplicates: e.g.
167-
// | EVAL x = y | STATS ... BY x, y
168-
// All substitutions happen before; groupings must be attributes at this point except for non-evaluatable groupings which will be
169-
// an alias like `c = CATEGORIZE(attribute)`.
170-
// Therefore, it is correct to deduplicate based on simple equality (based on names) instead of name ids (Set vs. AttributeSet).
171-
// TODO: The deduplication based on simple equality will be insufficient in case of multiple non-evaluatable groupings, e.g. for
172-
// `| EVAL x = y | STATS ... BY CATEGORIZE(x), CATEGORIZE(y)`. That will require semantic equality instead.
173-
LinkedHashSet<NamedExpression> resolvedGroupings = new LinkedHashSet<>();
174-
for (NamedExpression ne : upperGroupings) {
175-
NamedExpression transformed = (NamedExpression) ne.transformUp(Attribute.class, a -> aliases.resolve(a, a));
176-
resolvedGroupings.add(transformed);
177-
}
178-
return new ArrayList<>(resolvedGroupings);
179-
}
180-
181219
/**
182220
* Replace grouping alias previously contained in the aggregations that might have been projected away.
183221
*/

0 commit comments

Comments
 (0)