From 8450195f2124146b2f47cb91df4a7521b30f7f9f Mon Sep 17 00:00:00 2001 From: Bogdan Pintea Date: Fri, 11 Apr 2025 16:19:54 +0200 Subject: [PATCH] ESQL: Split grouping functions based on their EVAL-ability (#126597) This splits the grouping functions in two: those that can be evaluated independently through the EVAL operator (`BUCKET`) and those that don't (like those that that are evaluated through an agg operator, `CATEGORIZE`). Closes #124608 (cherry picked from commit 9784e0ec5f57b1eb15c846b499680dd1f735954c) --- .../functions/description/categorize.asciidoc | 6 +- .../src/main/resources/categorize.csv-spec | 15 ++ .../expression/function/grouping/Bucket.java | 5 +- .../function/grouping/Categorize.java | 14 +- .../function/grouping/GroupingFunction.java | 33 ++++- .../rules/logical/CombineProjections.java | 15 +- .../optimizer/rules/logical/FoldNull.java | 6 +- ...ReplaceAggregateAggExpressionWithEval.java | 31 ++-- ...laceAggregateNestedExpressionWithEval.java | 137 +++++++++++------- .../optimizer/LogicalPlanOptimizerTests.java | 14 +- 10 files changed, 170 insertions(+), 106 deletions(-) diff --git a/docs/reference/esql/functions/description/categorize.asciidoc b/docs/reference/esql/functions/description/categorize.asciidoc index c956066ad53f3..98f1162e95c32 100644 --- a/docs/reference/esql/functions/description/categorize.asciidoc +++ b/docs/reference/esql/functions/description/categorize.asciidoc @@ -6,6 +6,6 @@ Groups text messages into categories of similarly formatted text values. `CATEGORIZE` has the following limitations: -* can't be used within other expressions -* can't be used with multiple groupings -* can't be used or referenced within aggregate functions +* can’t be used within other expressions +* can’t be used more than once in the groupings +* can’t be used or referenced within aggregate functions and it has to be the first grouping diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/categorize.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/categorize.csv-spec index 5ad62dd7a21a8..aef412975fa89 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/categorize.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/categorize.csv-spec @@ -584,6 +584,21 @@ c:long 3 ; +reuse categorize arg expression in agg +required_capability: categorize_v5 + +FROM sample_data +| STATS m = MAX(LENGTH(CONCAT(message, "_end"))) BY c = CATEGORIZE(CONCAT(message, "_end")) +| SORT m +; + +m:integer |c:keyword +16 |.*?Disconnected_end.*? +20 |.*?Connection.+?error_end.*? +25 |.*?Connected.+?to.*? +; + + categorize in aggs inside function required_capability: categorize_v5 diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/grouping/Bucket.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/grouping/Bucket.java index 90b35e469ddce..137f60fd5ffd6 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/grouping/Bucket.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/grouping/Bucket.java @@ -57,7 +57,10 @@ * from a number of desired buckets (as a hint) and a range (auto mode). * In the former case, two parameters will be provided, in the latter four. */ -public class Bucket extends GroupingFunction implements PostOptimizationVerificationAware, TwoOptionalArguments { +public class Bucket extends GroupingFunction.EvaluatableGroupingFunction + implements + PostOptimizationVerificationAware, + TwoOptionalArguments { public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(Expression.class, "Bucket", Bucket::new); // TODO maybe we should just cover the whole of representable dates here - like ten years, 100 years, 1000 years, all the way up. diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/grouping/Categorize.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/grouping/Categorize.java index 570ce7a96dd68..abef029ee11cb 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/grouping/Categorize.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/grouping/Categorize.java @@ -10,7 +10,6 @@ import org.elasticsearch.common.io.stream.NamedWriteableRegistry; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; -import org.elasticsearch.compute.operator.EvalOperator.ExpressionEvaluator; import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.expression.Nullability; import org.elasticsearch.xpack.esql.core.tree.NodeInfo; @@ -36,7 +35,7 @@ * For the implementation, see {@link org.elasticsearch.compute.aggregation.blockhash.CategorizeBlockHash} *

*/ -public class Categorize extends GroupingFunction { +public class Categorize extends GroupingFunction.NonEvaluatableGroupingFunction { public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry( Expression.class, "Categorize", @@ -51,9 +50,9 @@ public class Categorize extends GroupingFunction { detailedDescription = """ `CATEGORIZE` has the following limitations: - * can't be used within other expressions - * can't be used with multiple groupings - * can't be used or referenced within aggregate functions""", + * can’t be used within other expressions + * can’t be used more than once in the groupings + * can’t be used or referenced within aggregate functions and it has to be the first grouping""", examples = { @Example( file = "docs", @@ -99,11 +98,6 @@ public Nullability nullable() { return Nullability.TRUE; } - @Override - public ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) { - throw new UnsupportedOperationException("CATEGORIZE is only evaluated during aggregations"); - } - @Override protected TypeResolution resolveType() { return isString(field(), sourceText(), DEFAULT); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/grouping/GroupingFunction.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/grouping/GroupingFunction.java index fd025e5e67a7c..5a1c826578771 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/grouping/GroupingFunction.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/grouping/GroupingFunction.java @@ -22,17 +22,13 @@ import static org.elasticsearch.xpack.esql.common.Failure.fail; -public abstract class GroupingFunction extends Function implements EvaluatorMapper, PostAnalysisPlanVerificationAware { +public abstract sealed class GroupingFunction extends Function implements PostAnalysisPlanVerificationAware permits + GroupingFunction.NonEvaluatableGroupingFunction, GroupingFunction.EvaluatableGroupingFunction { protected GroupingFunction(Source source, List fields) { super(source, fields); } - @Override - public Object fold(FoldContext ctx) { - return EvaluatorMapper.super.fold(source(), ctx); - } - @Override public BiConsumer postAnalysisPlanVerification() { return (p, failures) -> { @@ -45,4 +41,29 @@ public BiConsumer postAnalysisPlanVerification() { }; } + /** + * This is a class of grouping functions that cannot be evaluated outside the context of an aggregation. + * They will have their evaluation implemented part of an aggregation, which may keep state for their execution, making them "stateful" + * grouping functions. + */ + public abstract static non-sealed class NonEvaluatableGroupingFunction extends GroupingFunction { + protected NonEvaluatableGroupingFunction(Source source, List fields) { + super(source, fields); + } + } + + /** + * This is a class of grouping functions that can be evaluated independently within an EVAL operator, independent of the aggregation + * they're used by. + */ + public abstract static non-sealed class EvaluatableGroupingFunction extends GroupingFunction implements EvaluatorMapper { + protected EvaluatableGroupingFunction(Source source, List fields) { + super(source, fields); + } + + @Override + public Object fold(FoldContext ctx) { + return EvaluatorMapper.super.fold(source(), ctx); + } + } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/CombineProjections.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/CombineProjections.java index 0371510d6f306..a4ec92207444f 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/CombineProjections.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/CombineProjections.java @@ -15,7 +15,7 @@ import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.expression.Expressions; import org.elasticsearch.xpack.esql.core.expression.NamedExpression; -import org.elasticsearch.xpack.esql.expression.function.grouping.Categorize; +import org.elasticsearch.xpack.esql.expression.function.grouping.GroupingFunction; import org.elasticsearch.xpack.esql.plan.logical.Aggregate; import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan; import org.elasticsearch.xpack.esql.plan.logical.Project; @@ -67,11 +67,11 @@ protected LogicalPlan rule(UnaryPlan plan) { for (Expression grouping : groupings) { if (grouping instanceof Attribute attribute) { groupingAttrs.add(attribute); - } else if (grouping instanceof Alias as && as.child() instanceof Categorize) { + } else if (grouping instanceof Alias as && as.child() instanceof GroupingFunction.NonEvaluatableGroupingFunction) { groupingAttrs.add(as); } else { // After applying ReplaceAggregateNestedExpressionWithEval, - // groupings (except Categorize) can only contain attributes. + // evaluatable groupings can only contain attributes. throw new EsqlIllegalArgumentException("Expected an Attribute, got {}", grouping); } } @@ -147,7 +147,8 @@ private static List combineUpperGroupingsAndLowerProjections( List lowerProjections ) { assert upperGroupings.size() <= 1 - || upperGroupings.stream().anyMatch(group -> group.anyMatch(expr -> expr instanceof Categorize)) == false + || upperGroupings.stream() + .anyMatch(group -> group.anyMatch(expr -> expr instanceof GroupingFunction.NonEvaluatableGroupingFunction)) == false : "CombineProjections only tested with a single CATEGORIZE with no additional groups"; // Collect the alias map for resolving the source (f1 = 1, f2 = f1, etc..) AttributeMap.Builder aliasesBuilder = AttributeMap.builder(); @@ -161,10 +162,10 @@ private static List combineUpperGroupingsAndLowerProjections( // Propagate any renames from the lower projection into the upper groupings. // This can lead to duplicates: e.g. // | EVAL x = y | STATS ... BY x, y - // All substitutions happen before; groupings must be attributes at this point except for CATEGORIZE which will be an alias like - // `c = CATEGORIZE(attribute)`. + // All substitutions happen before; groupings must be attributes at this point except for non-evaluatable groupings which will be + // an alias like `c = CATEGORIZE(attribute)`. // Therefore, it is correct to deduplicate based on simple equality (based on names) instead of name ids (Set vs. AttributeSet). - // TODO: The deduplication based on simple equality will be insufficient in case of multiple CATEGORIZEs, e.g. for + // TODO: The deduplication based on simple equality will be insufficient in case of multiple non-evaluatable groupings, e.g. for // `| EVAL x = y | STATS ... BY CATEGORIZE(x), CATEGORIZE(y)`. That will require semantic equality instead. LinkedHashSet resolvedGroupings = new LinkedHashSet<>(); for (NamedExpression ne : upperGroupings) { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/FoldNull.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/FoldNull.java index cf4c7f19baafe..d60cb130f37b7 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/FoldNull.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/FoldNull.java @@ -13,7 +13,7 @@ import org.elasticsearch.xpack.esql.core.expression.Literal; import org.elasticsearch.xpack.esql.core.expression.Nullability; import org.elasticsearch.xpack.esql.expression.function.aggregate.AggregateFunction; -import org.elasticsearch.xpack.esql.expression.function.grouping.Categorize; +import org.elasticsearch.xpack.esql.expression.function.grouping.GroupingFunction; import org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.In; import org.elasticsearch.xpack.esql.optimizer.LogicalOptimizerContext; @@ -43,9 +43,9 @@ public Expression rule(Expression e, LogicalOptimizerContext ctx) { return Literal.of(in, null); } } else if (e instanceof Alias == false && e.nullable() == Nullability.TRUE - // Categorize function stays as a STATS grouping (It isn't moved to an early EVAL like other groupings), + // Non-evaluatable functions stay as a STATS grouping (It isn't moved to an early EVAL like other groupings), // so folding it to null would currently break the plan, as we don't create an attribute/channel for that null value. - && e instanceof Categorize == false + && e instanceof GroupingFunction.NonEvaluatableGroupingFunction == false && Expressions.anyMatch(e.children(), Expressions::isGuaranteedNull)) { return Literal.of(e, null); } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/ReplaceAggregateAggExpressionWithEval.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/ReplaceAggregateAggExpressionWithEval.java index 46e47d097d3fd..075d8676abde4 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/ReplaceAggregateAggExpressionWithEval.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/ReplaceAggregateAggExpressionWithEval.java @@ -16,7 +16,7 @@ import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.core.util.Holder; import org.elasticsearch.xpack.esql.expression.function.aggregate.AggregateFunction; -import org.elasticsearch.xpack.esql.expression.function.grouping.Categorize; +import org.elasticsearch.xpack.esql.expression.function.grouping.GroupingFunction; import org.elasticsearch.xpack.esql.plan.logical.Aggregate; import org.elasticsearch.xpack.esql.plan.logical.Eval; import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan; @@ -50,20 +50,20 @@ public ReplaceAggregateAggExpressionWithEval() { @Override protected LogicalPlan rule(Aggregate aggregate) { - // build alias map + // an alias map for evaluatable grouping functions AttributeMap.Builder aliasesBuilder = AttributeMap.builder(); - aggregate.forEachExpressionUp(Alias.class, a -> aliasesBuilder.put(a.toAttribute(), a.child())); - var aliases = aliasesBuilder.build(); - - // Build Categorize grouping functions map. - // Functions like BUCKET() shouldn't reach this point, - // as they are moved to an early EVAL by ReplaceAggregateNestedExpressionWithEval - Map groupingAttributes = new HashMap<>(); + // a function map for non-evaluatable grouping functions + Map nonEvalGroupingAttributes = new HashMap<>( + aggregate.groupings().size() + ); aggregate.forEachExpressionUp(Alias.class, a -> { - if (a.child() instanceof Categorize groupingFunction) { - groupingAttributes.put(groupingFunction, a.toAttribute()); + if (a.child() instanceof GroupingFunction.NonEvaluatableGroupingFunction groupingFunction) { + nonEvalGroupingAttributes.put(groupingFunction, a.toAttribute()); + } else { + aliasesBuilder.put(a.toAttribute(), a.child()); } }); + var aliases = aliasesBuilder.build(); // break down each aggregate into AggregateFunction and/or grouping key // preserve the projection at the end @@ -123,8 +123,11 @@ protected LogicalPlan rule(Aggregate aggregate) { return alias.toAttribute(); }); - // replace grouping functions with their references - aggExpression = aggExpression.transformUp(Categorize.class, groupingAttributes::get); + // replace non-evaluatable grouping functions with their references + aggExpression = aggExpression.transformUp( + GroupingFunction.NonEvaluatableGroupingFunction.class, + nonEvalGroupingAttributes::get + ); Alias alias = as.replaceChild(aggExpression); newEvals.add(alias); @@ -152,7 +155,7 @@ protected LogicalPlan rule(Aggregate aggregate) { return plan; } - static String syntheticName(Expression expression, Expression af, int counter) { + private static String syntheticName(Expression expression, Expression af, int counter) { return TemporaryNameUtils.temporaryName(expression, af, counter); } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/ReplaceAggregateNestedExpressionWithEval.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/ReplaceAggregateNestedExpressionWithEval.java index 4dbc43454a023..dd7ee26aa84bd 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/ReplaceAggregateNestedExpressionWithEval.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/ReplaceAggregateNestedExpressionWithEval.java @@ -13,7 +13,6 @@ import org.elasticsearch.xpack.esql.core.expression.NamedExpression; import org.elasticsearch.xpack.esql.core.util.Holder; import org.elasticsearch.xpack.esql.expression.function.aggregate.AggregateFunction; -import org.elasticsearch.xpack.esql.expression.function.grouping.Categorize; import org.elasticsearch.xpack.esql.expression.function.grouping.GroupingFunction; import org.elasticsearch.xpack.esql.plan.logical.Aggregate; import org.elasticsearch.xpack.esql.plan.logical.Eval; @@ -44,30 +43,27 @@ protected LogicalPlan rule(Aggregate aggregate) { List newGroupings = new ArrayList<>(aggregate.groupings()); boolean groupingChanged = false; - // start with the groupings since the aggs might duplicate it + // start with the groupings since the aggs might reuse/reference them for (int i = 0, s = newGroupings.size(); i < s; i++) { Expression g = newGroupings.get(i); - // Move the alias into an eval and replace it with its attribute. - // Exception: Categorize is internal to the aggregation and remains in the groupings. We move its child expression into an eval. if (g instanceof Alias as) { - if (as.child() instanceof Categorize cat) { - // For Categorize grouping function, we only move the child expression into an eval - if (cat.field() instanceof Attribute == false) { + Expression asChild = as.child(); + // for non-evaluable grouping functions, replace their nested expressions with attributes and extract the expression out + // into an eval (added later below) + if (asChild instanceof GroupingFunction.NonEvaluatableGroupingFunction gf) { + Expression newGroupingFunction = transformNonEvaluatableGroupingFunction(gf, evals); + if (newGroupingFunction != gf) { groupingChanged = true; - var fieldAs = new Alias(as.source(), as.name(), cat.field(), null, true); - var fieldAttr = fieldAs.toAttribute(); - evals.add(fieldAs); - evalNames.put(fieldAs.name(), fieldAttr); - Categorize replacement = cat.replaceChildren(List.of(fieldAttr)); - newGroupings.set(i, as.replaceChild(replacement)); + newGroupings.set(i, as.replaceChild(newGroupingFunction)); } } else { + // Move the alias into an eval and replace it with its attribute. groupingChanged = true; var attr = as.toAttribute(); evals.add(as); evalNames.put(as.name(), attr); newGroupings.set(i, attr); - if (as.child() instanceof GroupingFunction gf) { + if (asChild instanceof GroupingFunction.EvaluatableGroupingFunction gf) { groupingAttributes.put(gf, attr); } } @@ -91,17 +87,7 @@ protected LogicalPlan rule(Aggregate aggregate) { // if the child is a nested expression Expression child = as.child(); - // do not replace nested aggregates - if (child instanceof AggregateFunction af) { - Holder foundNestedAggs = new Holder<>(Boolean.FALSE); - af.children().forEach(e -> e.forEachDown(AggregateFunction.class, unused -> foundNestedAggs.set(Boolean.TRUE))); - if (foundNestedAggs.get()) { - return as; - } - } - - // shortcut for common scenario - if (child instanceof AggregateFunction af && af.field() instanceof Attribute) { + if (child instanceof AggregateFunction af && skipOptimisingAgg(af)) { return as; } @@ -112,33 +98,13 @@ protected LogicalPlan rule(Aggregate aggregate) { return ref; } - // 1. look for the aggregate function - var replaced = child.transformUp(AggregateFunction.class, af -> { - Expression result = af; - - Expression field = af.field(); - // 2. if the field is a nested expression (not attribute or literal), replace it - if (field instanceof Attribute == false && field.foldable() == false) { - // 3. create a new alias if one doesn't exist yet no reference - Attribute attr = expToAttribute.computeIfAbsent(field.canonical(), k -> { - Alias newAlias = new Alias(k.source(), syntheticName(k, af, counter[0]++), k, null, true); - evals.add(newAlias); - return newAlias.toAttribute(); - }); - aggsChanged.set(true); - // replace field with attribute - List newChildren = new ArrayList<>(af.children()); - newChildren.set(0, attr); - result = af.replaceChildren(newChildren); - } - return result; - }); - // replace any grouping functions with their references pointing to the added synthetic eval - replaced = replaced.transformDown(GroupingFunction.class, gf -> { - // Categorize in aggs depends on the grouping result, not on an early eval - if (gf instanceof Categorize) { - return gf; - } + // look for the aggregate function + var replaced = child.transformUp( + AggregateFunction.class, + af -> transformAggregateFunction(af, expToAttribute, evals, counter, aggsChanged) + ); + // replace any evaluatable grouping functions with their references pointing to the added synthetic eval + replaced = replaced.transformDown(GroupingFunction.EvaluatableGroupingFunction.class, gf -> { aggsChanged.set(true); // should never return null, as it's verified. // but even if broken, the transform will fail safely; otoh, returning `gf` will fail later due to incorrect plan. @@ -159,10 +125,71 @@ protected LogicalPlan rule(Aggregate aggregate) { aggregate = aggregate.with(newEval, groupings, aggregates); } - return (LogicalPlan) aggregate; + return aggregate; + } + + private static Expression transformNonEvaluatableGroupingFunction( + GroupingFunction.NonEvaluatableGroupingFunction gf, + List evals + ) { + int counter = 0; + boolean childrenChanged = false; + List newChildren = new ArrayList<>(gf.children().size()); + + for (Expression ex : gf.children()) { + if (ex instanceof Attribute == false) { // TODO: foldables shouldn't require eval'ing either + var alias = new Alias(ex.source(), syntheticName(ex, gf, counter++), ex, null, true); + evals.add(alias); + newChildren.add(alias.toAttribute()); + childrenChanged = true; + } else { + newChildren.add(ex); + } + } + + return childrenChanged ? gf.replaceChildren(newChildren) : gf; + } + + private static boolean skipOptimisingAgg(AggregateFunction af) { + // shortcut for the common scenario + if (af.field() instanceof Attribute) { + return true; + } + + // do not replace nested aggregates + Holder foundNestedAggs = new Holder<>(Boolean.FALSE); + af.field().forEachDown(AggregateFunction.class, unused -> foundNestedAggs.set(Boolean.TRUE)); + return foundNestedAggs.get(); + } + + private static Expression transformAggregateFunction( + AggregateFunction af, + Map expToAttribute, + List evals, + int[] counter, + Holder aggsChanged + ) { + Expression result = af; + + Expression field = af.field(); + // if the field is a nested expression (not attribute or literal), replace it + if (field instanceof Attribute == false && field.foldable() == false) { + // create a new alias if one doesn't exist yet + Attribute attr = expToAttribute.computeIfAbsent(field.canonical(), k -> { + Alias newAlias = new Alias(k.source(), syntheticName(k, af, counter[0]++), k, null, true); + evals.add(newAlias); + return newAlias.toAttribute(); + }); + aggsChanged.set(true); + // replace field with attribute + List newChildren = new ArrayList<>(af.children()); + newChildren.set(0, attr); + result = af.replaceChildren(newChildren); + } + return result; } - static String syntheticName(Expression expression, AggregateFunction af, int counter) { - return TemporaryNameUtils.temporaryName(expression, af, counter); + private static String syntheticName(Expression expression, Expression func, int counter) { + return TemporaryNameUtils.temporaryName(expression, func, counter); } } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java index 45f2f2801df20..987251c0878a3 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java @@ -4320,11 +4320,11 @@ public void testNestedExpressionsInGroups() { /** * Expects - * Limit[1000[INTEGER]] - * \_Aggregate[STANDARD,[CATEGORIZE(CATEGORIZE(CONCAT(first_name, "abc")){r$}#18) AS CATEGORIZE(CONCAT(first_name, "abc"))],[CO - * UNT(salary{f}#13,true[BOOLEAN]) AS c, CATEGORIZE(CONCAT(first_name, "abc")){r}#3]] - * \_Eval[[CONCAT(first_name{f}#9,[61 62 63][KEYWORD]) AS CATEGORIZE(CONCAT(first_name, "abc"))]] - * \_EsRelation[test][_meta_field{f}#14, emp_no{f}#8, first_name{f}#9, ge..] + * Limit[1000[INTEGER],false] + * \_Aggregate[[CATEGORIZE($$CONCAT(first_na>$CATEGORIZE(CONC>$0{r$}#1590) AS CATEGORIZE(CONCAT(first_name, "abc"))],[COUNT(sa + * lary{f}#1584,true[BOOLEAN]) AS c, CATEGORIZE(CONCAT(first_name, "abc")){r}#1574]] + * \_Eval[[CONCAT(first_name{f}#1580,[61 62 63][KEYWORD]) AS $$CONCAT(first_na>$CATEGORIZE(CONC>$0]] + * \_EsRelation[test][_meta_field{f}#1585, emp_no{f}#1579, first_name{f}#..] */ public void testNestedExpressionsInGroupsWithCategorize() { var plan = optimizedPlan(""" @@ -4345,10 +4345,10 @@ public void testNestedExpressionsInGroupsWithCategorize() { var evalFieldAlias = as(eval.fields().get(0), Alias.class); var evalField = as(evalFieldAlias.child(), Concat.class); - assertThat(evalFieldAlias.name(), is("CATEGORIZE(CONCAT(first_name, \"abc\"))")); + assertThat(evalFieldAlias.name(), is("$$CONCAT(first_na>$CATEGORIZE(CONC>$0")); assertThat(categorize.field(), is(evalFieldAlias.toAttribute())); assertThat(evalField.source().text(), is("CONCAT(first_name, \"abc\")")); - assertThat(categorizeAlias.source(), is(evalFieldAlias.source())); + assertThat(categorizeAlias.source(), is(categorize.source())); } /**