| 
22 | 22 | import org.elasticsearch.xpack.esql.plan.logical.UnaryPlan;  | 
23 | 23 | 
 
  | 
24 | 24 | import java.util.ArrayList;  | 
 | 25 | +import java.util.LinkedHashSet;  | 
25 | 26 | import java.util.List;  | 
26 | 27 | 
 
  | 
27 | 28 | public final class CombineProjections extends OptimizerRules.OptimizerRule<UnaryPlan> {  | 
@@ -144,30 +145,31 @@ private static List<Expression> combineUpperGroupingsAndLowerProjections(  | 
144 | 145 |         List<? extends NamedExpression> upperGroupings,  | 
145 | 146 |         List<? extends NamedExpression> lowerProjections  | 
146 | 147 |     ) {  | 
 | 148 | +        assert upperGroupings.size() <= 1  | 
 | 149 | +            || upperGroupings.stream().anyMatch(group -> group.anyMatch(expr -> expr instanceof Categorize)) == false  | 
 | 150 | +            : "CombineProjections only tested with a single CATEGORIZE with no additional groups";  | 
147 | 151 |         // Collect the alias map for resolving the source (f1 = 1, f2 = f1, etc..)  | 
148 |  | -        AttributeMap<Expression> aliases = new AttributeMap<>();  | 
 | 152 | +        AttributeMap<Attribute> aliases = new AttributeMap<>();  | 
149 | 153 |         for (NamedExpression ne : lowerProjections) {  | 
150 |  | -            // record the alias  | 
151 |  | -            aliases.put(ne.toAttribute(), Alias.unwrap(ne));  | 
 | 154 | +            // Record the aliases.  | 
 | 155 | +            // Projections are just aliases for attributes, so casting is safe.  | 
 | 156 | +            aliases.put(ne.toAttribute(), (Attribute) Alias.unwrap(ne));  | 
152 | 157 |         }  | 
153 |  | -        // Replace any matching attribute directly with the aliased attribute from the projection.  | 
154 |  | -        AttributeSet seen = new AttributeSet();  | 
155 |  | -        List<Expression> replaced = new ArrayList<>();  | 
 | 158 | + | 
 | 159 | +        // Propagate any renames from the lower projection into the upper groupings.  | 
 | 160 | +        // This can lead to duplicates: e.g.  | 
 | 161 | +        // | EVAL x = y | STATS ... BY x, y  | 
 | 162 | +        // All substitutions happen before; groupings must be attributes at this point except for CATEGORIZE which will be an alias like  | 
 | 163 | +        // `c = CATEGORIZE(attribute)`.  | 
 | 164 | +        // Therefore, it is correct to deduplicate based on simple equality (based on names) instead of name ids (Set vs. AttributeSet).  | 
 | 165 | +        // TODO: The deduplication based on simple equality will be insufficient in case of multiple CATEGORIZEs, e.g. for  | 
 | 166 | +        // `| EVAL x = y | STATS ... BY CATEGORIZE(x), CATEGORIZE(y)`. That will require semantic equality instead.  | 
 | 167 | +        LinkedHashSet<NamedExpression> resolvedGroupings = new LinkedHashSet<>();  | 
156 | 168 |         for (NamedExpression ne : upperGroupings) {  | 
157 |  | -            // Duplicated attributes are ignored.  | 
158 |  | -            if (ne instanceof Attribute attribute) {  | 
159 |  | -                var newExpression = aliases.resolve(attribute, attribute);  | 
160 |  | -                if (newExpression instanceof Attribute newAttribute && seen.add(newAttribute) == false) {  | 
161 |  | -                    // Already seen, skip  | 
162 |  | -                    continue;  | 
163 |  | -                }  | 
164 |  | -                replaced.add(newExpression);  | 
165 |  | -            } else {  | 
166 |  | -                // For grouping functions, this will replace nested properties too  | 
167 |  | -                replaced.add(ne.transformUp(Attribute.class, a -> aliases.resolve(a, a)));  | 
168 |  | -            }  | 
 | 169 | +            NamedExpression transformed = (NamedExpression) ne.transformUp(Attribute.class, a -> aliases.resolve(a, a));  | 
 | 170 | +            resolvedGroupings.add(transformed);  | 
169 | 171 |         }  | 
170 |  | -        return replaced;  | 
 | 172 | +        return new ArrayList<>(resolvedGroupings);  | 
171 | 173 |     }  | 
172 | 174 | 
 
  | 
173 | 175 |     /**  | 
 | 
0 commit comments