2727import java .util .HashSet ;
2828import java .util .LinkedHashSet ;
2929import java .util .List ;
30+ import java .util .Set ;
3031
3132public final class CombineProjections extends OptimizerRules .OptimizerRule <UnaryPlan > {
3233 // don't drop groupings from a local plan, as the layout has already been agreed upon
@@ -100,25 +101,12 @@ protected LogicalPlan rule(UnaryPlan plan) {
100101
101102 // This can lead to duplicates in the groupings: e.g.
102103 // | EVAL x = y | STATS ... BY x, y
103- if (local == false ) {
104- // On the coordinator, we can just discard the duplicates.
105-
106- // All substitutions happen before; groupings must be attributes at this point except for non-evaluatable groupings which
107- // will be an alias like `c = CATEGORIZE(attribute)`.
108- // Due to such aliases, we can't use an AttributeSet to deduplicate. But we can use a regular set to deduplicate based on
109- // regular equality (i.e. based on names) instead of name ids.
110- // TODO: The deduplication based on simple equality will be insufficient in case of multiple non-evaluatable groupings, e.g.
111- // for `| EVAL x = y | STATS ... BY CATEGORIZE(x), CATEGORIZE(y)`. That will require semantic equality instead. Also
112- // applies in the local case below.
113- LinkedHashSet <Expression > deduplicatedResolvedGroupings = new LinkedHashSet <>(resolvedGroupings );
114- List <Expression > newGroupings = new ArrayList <>(deduplicatedResolvedGroupings );
115- plan = a .with (p .child (), newGroupings , combineProjections (a .aggregates (), p .projections ()));
116- } else {
104+ if (local ) {
117105 // On the data node, the groupings must be preserved because they affect the physical output (see
118106 // AbstractPhysicalOperationProviders#intermediateAttributes).
119107 // In case that propagating the lower projection leads to duplicates in the resolved groupings, we'll leave an Eval in place
120108 // of the original projection to create new attributes for the duplicate groups.
121- HashSet <Expression > seenResolvedGroupings = new HashSet <>(resolvedGroupings .size ());
109+ Set <Expression > seenResolvedGroupings = new HashSet <>(resolvedGroupings .size ());
122110 List <Expression > newGroupings = new ArrayList <>();
123111 List <Alias > aliasesAgainstDuplication = new ArrayList <>();
124112
@@ -142,7 +130,6 @@ protected LogicalPlan rule(UnaryPlan plan) {
142130 // propagate the new alias into the new grouping
143131 AttributeMap .Builder <Attribute > resolverBuilder = AttributeMap .builder ();
144132 resolverBuilder .put (coreAttribute , renameAgainstDuplication .toAttribute ());
145- resolverBuilder .build ();
146133 AttributeMap <Attribute > resolver = resolverBuilder .build ();
147134
148135 newGroupings .add (resolvedGrouping .transformUp (Attribute .class , attr -> resolver .resolve (attr , attr )));
@@ -153,6 +140,17 @@ protected LogicalPlan rule(UnaryPlan plan) {
153140 ? p .child ()
154141 : new Eval (p .source (), p .child (), aliasesAgainstDuplication );
155142 plan = a .with (newChild , newGroupings , combineProjections (a .aggregates (), p .projections ()));
143+ } else {
144+ // On the coordinator, we can just discard the duplicates.
145+ // All substitutions happen before; groupings must be attributes at this point except for non-evaluatable groupings which
146+ // will be an alias like `c = CATEGORIZE(attribute)`.
147+ // Due to such aliases, we can't use an AttributeSet to deduplicate. But we can use a regular set to deduplicate based on
148+ // regular equality (i.e. based on names) instead of name ids.
149+ // TODO: The deduplication based on simple equality will be insufficient in case of multiple non-evaluatable groupings, e.g.
150+ // for `| EVAL x = y | STATS ... BY CATEGORIZE(x), CATEGORIZE(y)`. That will require semantic equality instead. Also
151+ // applies in the local case below.
152+ List <Expression > newGroupings = new ArrayList <>(new LinkedHashSet <>(resolvedGroupings ));
153+ plan = a .with (p .child (), newGroupings , combineProjections (a .aggregates (), p .projections ()));
156154 }
157155 }
158156
0 commit comments