Skip to content

Commit cca7051

Browse files
authored
ESQL: Simplify CombineProjections (#117882)
Make combineUpperGroupingsAndLowerProjections a bit simpler. Also slightly improve a test and add comments to provide more context.
1 parent cab6dc5 commit cca7051

File tree

2 files changed

+22
-20
lines changed

2 files changed

+22
-20
lines changed

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/CombineProjections.java

Lines changed: 21 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import org.elasticsearch.xpack.esql.plan.logical.UnaryPlan;
2323

2424
import java.util.ArrayList;
25+
import java.util.LinkedHashSet;
2526
import java.util.List;
2627

2728
public final class CombineProjections extends OptimizerRules.OptimizerRule<UnaryPlan> {
@@ -144,30 +145,31 @@ private static List<Expression> combineUpperGroupingsAndLowerProjections(
144145
List<? extends NamedExpression> upperGroupings,
145146
List<? extends NamedExpression> lowerProjections
146147
) {
148+
assert upperGroupings.size() <= 1
149+
|| upperGroupings.stream().anyMatch(group -> group.anyMatch(expr -> expr instanceof Categorize)) == false
150+
: "CombineProjections only tested with a single CATEGORIZE with no additional groups";
147151
// Collect the alias map for resolving the source (f1 = 1, f2 = f1, etc..)
148-
AttributeMap<Expression> aliases = new AttributeMap<>();
152+
AttributeMap<Attribute> aliases = new AttributeMap<>();
149153
for (NamedExpression ne : lowerProjections) {
150-
// record the alias
151-
aliases.put(ne.toAttribute(), Alias.unwrap(ne));
154+
// Record the aliases.
155+
// Projections are just aliases for attributes, so casting is safe.
156+
aliases.put(ne.toAttribute(), (Attribute) Alias.unwrap(ne));
152157
}
153-
// Replace any matching attribute directly with the aliased attribute from the projection.
154-
AttributeSet seen = new AttributeSet();
155-
List<Expression> replaced = new ArrayList<>();
158+
159+
// Propagate any renames from the lower projection into the upper groupings.
160+
// This can lead to duplicates: e.g.
161+
// | EVAL x = y | STATS ... BY x, y
162+
// All substitutions happen before; groupings must be attributes at this point except for CATEGORIZE which will be an alias like
163+
// `c = CATEGORIZE(attribute)`.
164+
// Therefore, it is correct to deduplicate based on simple equality (based on names) instead of name ids (Set vs. AttributeSet).
165+
// TODO: The deduplication based on simple equality will be insufficient in case of multiple CATEGORIZEs, e.g. for
166+
// `| EVAL x = y | STATS ... BY CATEGORIZE(x), CATEGORIZE(y)`. That will require semantic equality instead.
167+
LinkedHashSet<NamedExpression> resolvedGroupings = new LinkedHashSet<>();
156168
for (NamedExpression ne : upperGroupings) {
157-
// Duplicated attributes are ignored.
158-
if (ne instanceof Attribute attribute) {
159-
var newExpression = aliases.resolve(attribute, attribute);
160-
if (newExpression instanceof Attribute newAttribute && seen.add(newAttribute) == false) {
161-
// Already seen, skip
162-
continue;
163-
}
164-
replaced.add(newExpression);
165-
} else {
166-
// For grouping functions, this will replace nested properties too
167-
replaced.add(ne.transformUp(Attribute.class, a -> aliases.resolve(a, a)));
168-
}
169+
NamedExpression transformed = (NamedExpression) ne.transformUp(Attribute.class, a -> aliases.resolve(a, a));
170+
resolvedGroupings.add(transformed);
169171
}
170-
return replaced;
172+
return new ArrayList<>(resolvedGroupings);
171173
}
172174

173175
/**

x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1217,7 +1217,7 @@ public void testCombineProjectionWithCategorizeGrouping() {
12171217
var plan = plan("""
12181218
from test
12191219
| eval k = first_name, k1 = k
1220-
| stats s = sum(salary) by cat = CATEGORIZE(k)
1220+
| stats s = sum(salary) by cat = CATEGORIZE(k1)
12211221
| keep s, cat
12221222
""");
12231223

0 commit comments

Comments
 (0)