1818import org .elasticsearch .xpack .esql .core .expression .ReferenceAttribute ;
1919import org .elasticsearch .xpack .esql .expression .function .grouping .GroupingFunction ;
2020import org .elasticsearch .xpack .esql .plan .logical .Aggregate ;
21+ import org .elasticsearch .xpack .esql .plan .logical .Eval ;
2122import org .elasticsearch .xpack .esql .plan .logical .LogicalPlan ;
2223import org .elasticsearch .xpack .esql .plan .logical .Project ;
2324import org .elasticsearch .xpack .esql .plan .logical .UnaryPlan ;
2425
2526import java .util .ArrayList ;
27+ import java .util .HashSet ;
2628import java .util .LinkedHashSet ;
2729import java .util .List ;
2830
@@ -57,9 +59,7 @@ protected LogicalPlan rule(UnaryPlan plan) {
5759 // project can be fully removed
5860 if (newAggs != null ) {
5961 var newGroups = replacePrunedAliasesUsedInGroupBy (a .groupings (), aggs , newAggs );
60- if (local == false || newGroups .size () == a .groupings ().size ()) {
61- plan = a .with (newGroups , newAggs );
62- }
62+ plan = a .with (newGroups , newAggs );
6363 }
6464 }
6565 return plan ;
@@ -68,21 +68,92 @@ protected LogicalPlan rule(UnaryPlan plan) {
6868 // Agg with underlying Project (group by on sub-queries)
6969 if (plan instanceof Aggregate a && child instanceof Project p ) {
7070 var groupings = a .groupings ();
71- List <NamedExpression > groupingAttrs = new ArrayList <>(a .groupings ().size ());
71+
72+ // sanity checks
7273 for (Expression grouping : groupings ) {
73- if (grouping instanceof Attribute attribute ) {
74- groupingAttrs .add (attribute );
75- } else if (grouping instanceof Alias as && as .child () instanceof GroupingFunction .NonEvaluatableGroupingFunction ) {
76- groupingAttrs .add (as );
77- } else {
74+ if ((grouping instanceof Attribute
75+ || grouping instanceof Alias as && as .child () instanceof GroupingFunction .NonEvaluatableGroupingFunction ) == false ) {
7876 // After applying ReplaceAggregateNestedExpressionWithEval,
7977 // evaluatable groupings can only contain attributes.
80- throw new EsqlIllegalArgumentException ("Expected an Attribute , got {}" , grouping );
78+ throw new EsqlIllegalArgumentException ("Expected an attribute or grouping function , got {}" , grouping );
8179 }
8280 }
83- List <Expression > newGroupings = combineUpperGroupingsAndLowerProjections (groupingAttrs , p .projections ());
84- if (local == false || newGroupings .size () == a .groupings ().size ()) {
81+ assert groupings .size () <= 1
82+ || groupings .stream ()
83+ .anyMatch (group -> group .anyMatch (expr -> expr instanceof GroupingFunction .NonEvaluatableGroupingFunction )) == false
84+ : "CombineProjections only tested with a single CATEGORIZE with no additional groups" ;
85+
86+ // Collect the alias map for resolving the source (f1 = 1, f2 = f1, etc..)
87+ AttributeMap .Builder <Attribute > aliasesBuilder = AttributeMap .builder ();
88+ for (NamedExpression ne : p .projections ()) {
89+ // Record the aliases.
90+ // Projections are just aliases for attributes, so casting is safe.
91+ aliasesBuilder .put (ne .toAttribute (), (Attribute ) Alias .unwrap (ne ));
92+ }
93+ var aliases = aliasesBuilder .build ();
94+
95+ // Propagate any renames from the lower projection into the upper groupings.
96+ List <Expression > resolvedGroupings = new ArrayList <>();
97+ for (Expression grouping : groupings ) {
98+ Expression transformed = grouping .transformUp (Attribute .class , as -> aliases .resolve (as , as ));
99+ resolvedGroupings .add (transformed );
100+ }
101+
102+ // This can lead to duplicates in the groupings: e.g.
103+ // | EVAL x = y | STATS ... BY x, y
104+ if (local == false ) {
105+ // On the coordinator, we can just discard the duplicates.
106+
107+ // All substitutions happen before; groupings must be attributes at this point except for non-evaluatable groupings which
108+ // will be an alias like `c = CATEGORIZE(attribute)`.
109+ // Due to such aliases, we can't use an AttributeSet to deduplicate. But we can use a regular set to deduplicate based on
110+ // regular equality (i.e. based on names) instead of name ids.
111+ // TODO: The deduplication based on simple equality will be insufficient in case of multiple non-evaluatable groupings, e.g.
112+ // for `| EVAL x = y | STATS ... BY CATEGORIZE(x), CATEGORIZE(y)`. That will require semantic equality instead. Also
113+ // applies in the local case below.
114+ LinkedHashSet <Expression > deduplicatedResolvedGroupings = new LinkedHashSet <>(resolvedGroupings );
115+ List <Expression > newGroupings = new ArrayList <>(deduplicatedResolvedGroupings );
85116 plan = a .with (p .child (), newGroupings , combineProjections (a .aggregates (), p .projections ()));
117+ } else {
118+ // On the data node, the groupings must be preserved because they affect the physical output (see
119+ // AbstractPhysicalOperationProviders#intermediateAttributes).
120+ // In case that propagating the lower projection leads to duplicates in the resolved groupings, we'll leave an Eval in place
121+ // of the original projection to create new attributes for the duplicate groups.
122+ HashSet <Expression > seenResolvedGroupings = new HashSet <>(resolvedGroupings .size ());
123+ List <Expression > newGroupings = new ArrayList <>();
124+ List <Alias > aliasesAgainstDuplication = new ArrayList <>();
125+
126+ for (int i = 0 ; i < groupings .size (); i ++) {
127+ Expression resolvedGrouping = resolvedGroupings .get (i );
128+ if (seenResolvedGroupings .add (resolvedGrouping )) {
129+ newGroupings .add (resolvedGrouping );
130+ } else {
131+ // resolving the renames leads to a duplicate here - we need to alias the underlying attribute this refers to.
132+ // should really only be 1 attribute, anyway, but going via .references() includes the case of a
133+ // GroupingFunction.NonEvaluatableGroupingFunction.
134+ Attribute coreAttribute = resolvedGrouping .references ().iterator ().next ();
135+
136+ Alias renameAgainstDuplication = new Alias (
137+ coreAttribute .source (),
138+ TemporaryNameUtils .locallyUniqueTemporaryName (coreAttribute .name ()),
139+ coreAttribute
140+ );
141+ aliasesAgainstDuplication .add (renameAgainstDuplication );
142+
143+ // propagate the new alias into the new grouping
144+ AttributeMap .Builder <Attribute > resolverBuilder = AttributeMap .builder ();
145+ resolverBuilder .put (coreAttribute , renameAgainstDuplication .toAttribute ());
146+ resolverBuilder .build ();
147+ AttributeMap <Attribute > resolver = resolverBuilder .build ();
148+
149+ newGroupings .add (resolvedGrouping .transformUp (Attribute .class , attr -> resolver .resolve (attr , attr )));
150+ }
151+ }
152+
153+ LogicalPlan newChild = aliasesAgainstDuplication .isEmpty ()
154+ ? p .child ()
155+ : new Eval (p .source (), p .child (), aliasesAgainstDuplication );
156+ plan = a .with (newChild , newGroupings , combineProjections (a .aggregates (), p .projections ()));
86157 }
87158 }
88159
@@ -145,39 +216,6 @@ private static List<NamedExpression> combineProjections(List<? extends NamedExpr
145216 return replaced ;
146217 }
147218
148- private static List <Expression > combineUpperGroupingsAndLowerProjections (
149- List <? extends NamedExpression > upperGroupings ,
150- List <? extends NamedExpression > lowerProjections
151- ) {
152- assert upperGroupings .size () <= 1
153- || upperGroupings .stream ()
154- .anyMatch (group -> group .anyMatch (expr -> expr instanceof GroupingFunction .NonEvaluatableGroupingFunction )) == false
155- : "CombineProjections only tested with a single CATEGORIZE with no additional groups" ;
156- // Collect the alias map for resolving the source (f1 = 1, f2 = f1, etc..)
157- AttributeMap .Builder <Attribute > aliasesBuilder = AttributeMap .builder ();
158- for (NamedExpression ne : lowerProjections ) {
159- // Record the aliases.
160- // Projections are just aliases for attributes, so casting is safe.
161- aliasesBuilder .put (ne .toAttribute (), (Attribute ) Alias .unwrap (ne ));
162- }
163- var aliases = aliasesBuilder .build ();
164-
165- // Propagate any renames from the lower projection into the upper groupings.
166- // This can lead to duplicates: e.g.
167- // | EVAL x = y | STATS ... BY x, y
168- // All substitutions happen before; groupings must be attributes at this point except for non-evaluatable groupings which will be
169- // an alias like `c = CATEGORIZE(attribute)`.
170- // Therefore, it is correct to deduplicate based on simple equality (based on names) instead of name ids (Set vs. AttributeSet).
171- // TODO: The deduplication based on simple equality will be insufficient in case of multiple non-evaluatable groupings, e.g. for
172- // `| EVAL x = y | STATS ... BY CATEGORIZE(x), CATEGORIZE(y)`. That will require semantic equality instead.
173- LinkedHashSet <NamedExpression > resolvedGroupings = new LinkedHashSet <>();
174- for (NamedExpression ne : upperGroupings ) {
175- NamedExpression transformed = (NamedExpression ) ne .transformUp (Attribute .class , a -> aliases .resolve (a , a ));
176- resolvedGroupings .add (transformed );
177- }
178- return new ArrayList <>(resolvedGroupings );
179- }
180-
181219 /**
182220 * Replace grouping alias previously contained in the aggregations that might have been projected away.
183221 */
0 commit comments