Skip to content

Commit 8b91fbc

Browse files
committed
Split grouping functions based on their EVAL-ability
This splits the grouping functions in two groups: those that can be evaluated independently through the EVAL operator (BUCKET) and those that don't (like those that that are evaluated through an agg operator, CATEGORIZE).
1 parent edfb17e commit 8b91fbc

File tree

11 files changed

+157
-101
lines changed

11 files changed

+157
-101
lines changed

docs/reference/query-languages/esql/_snippets/functions/description/categorize.md

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

docs/reference/query-languages/esql/_snippets/functions/examples/bucket.md

Lines changed: 1 addition & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

x-pack/plugin/esql/qa/testFixtures/src/main/resources/categorize.csv-spec

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -584,6 +584,21 @@ c:long
584584
3
585585
;
586586

587+
reuse categorize arg expression in agg
588+
required_capability: categorize_v5
589+
590+
FROM sample_data
591+
| STATS m = MAX(LENGTH(CONCAT(message, "_end"))) BY c = CATEGORIZE(CONCAT(message, "_end"))
592+
| SORT m
593+
;
594+
595+
m:integer |c:keyword
596+
16 |.*?Disconnected_end.*?
597+
20 |.*?Connection.+?error_end.*?
598+
25 |.*?Connected.+?to.*?
599+
;
600+
601+
587602
categorize in aggs inside function
588603
required_capability: categorize_v5
589604

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/grouping/Bucket.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,10 @@
5858
* from a number of desired buckets (as a hint) and a range (auto mode).
5959
* In the former case, two parameters will be provided, in the latter four.
6060
*/
61-
public class Bucket extends GroupingFunction implements PostOptimizationVerificationAware, TwoOptionalArguments {
61+
public class Bucket extends GroupingFunction.EvaluatableGroupingFunction
62+
implements
63+
PostOptimizationVerificationAware,
64+
TwoOptionalArguments {
6265
public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(Expression.class, "Bucket", Bucket::new);
6366

6467
// TODO maybe we should just cover the whole of representable dates here - like ten years, 100 years, 1000 years, all the way up.

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/grouping/Categorize.java

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
1111
import org.elasticsearch.common.io.stream.StreamInput;
1212
import org.elasticsearch.common.io.stream.StreamOutput;
13-
import org.elasticsearch.compute.operator.EvalOperator.ExpressionEvaluator;
1413
import org.elasticsearch.xpack.esql.core.expression.Expression;
1514
import org.elasticsearch.xpack.esql.core.expression.Nullability;
1615
import org.elasticsearch.xpack.esql.core.tree.NodeInfo;
@@ -37,7 +36,7 @@
3736
* For the implementation, see {@link org.elasticsearch.compute.aggregation.blockhash.CategorizeBlockHash}
3837
* </p>
3938
*/
40-
public class Categorize extends GroupingFunction {
39+
public class Categorize extends GroupingFunction.NonEvaluatableGroupingFunction {
4140
public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(
4241
Expression.class,
4342
"Categorize",
@@ -53,8 +52,8 @@ public class Categorize extends GroupingFunction {
5352
`CATEGORIZE` has the following limitations:
5453
5554
* can’t be used within other expressions
56-
* can’t be used with multiple groupings
57-
* can’t be used or referenced within aggregate functions""",
55+
* can’t be used more than once in the groupings
56+
* can’t be used or referenced within aggregate functions and it has to be the first grouping""",
5857
examples = {
5958
@Example(
6059
file = "docs",
@@ -101,11 +100,6 @@ public Nullability nullable() {
101100
return Nullability.TRUE;
102101
}
103102

104-
@Override
105-
public ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) {
106-
throw new UnsupportedOperationException("CATEGORIZE is only evaluated during aggregations");
107-
}
108-
109103
@Override
110104
protected TypeResolution resolveType() {
111105
return isString(field(), sourceText(), DEFAULT);

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/grouping/GroupingFunction.java

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,17 +22,13 @@
2222

2323
import static org.elasticsearch.xpack.esql.common.Failure.fail;
2424

25-
public abstract class GroupingFunction extends Function implements EvaluatorMapper, PostAnalysisPlanVerificationAware {
25+
public abstract sealed class GroupingFunction extends Function implements PostAnalysisPlanVerificationAware permits
26+
GroupingFunction.NonEvaluatableGroupingFunction, GroupingFunction.EvaluatableGroupingFunction {
2627

2728
protected GroupingFunction(Source source, List<Expression> fields) {
2829
super(source, fields);
2930
}
3031

31-
@Override
32-
public Object fold(FoldContext ctx) {
33-
return EvaluatorMapper.super.fold(source(), ctx);
34-
}
35-
3632
@Override
3733
public BiConsumer<LogicalPlan, Failures> postAnalysisPlanVerification() {
3834
return (p, failures) -> {
@@ -45,4 +41,20 @@ public BiConsumer<LogicalPlan, Failures> postAnalysisPlanVerification() {
4541
};
4642
}
4743

44+
public abstract static non-sealed class NonEvaluatableGroupingFunction extends GroupingFunction {
45+
protected NonEvaluatableGroupingFunction(Source source, List<Expression> fields) {
46+
super(source, fields);
47+
}
48+
}
49+
50+
public abstract static non-sealed class EvaluatableGroupingFunction extends GroupingFunction implements EvaluatorMapper {
51+
protected EvaluatableGroupingFunction(Source source, List<Expression> fields) {
52+
super(source, fields);
53+
}
54+
55+
@Override
56+
public Object fold(FoldContext ctx) {
57+
return EvaluatorMapper.super.fold(source(), ctx);
58+
}
59+
}
4860
}

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/CombineProjections.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -159,10 +159,10 @@ private static List<Expression> combineUpperGroupingsAndLowerProjections(
159159
// Propagate any renames from the lower projection into the upper groupings.
160160
// This can lead to duplicates: e.g.
161161
// | EVAL x = y | STATS ... BY x, y
162-
// All substitutions happen before; groupings must be attributes at this point except for CATEGORIZE which will be an alias like
163-
// `c = CATEGORIZE(attribute)`.
162+
// All substitutions happen before; groupings must be attributes at this point except for non-evaluatable groupings which will be
163+
// an alias like `c = CATEGORIZE(attribute)`.
164164
// Therefore, it is correct to deduplicate based on simple equality (based on names) instead of name ids (Set vs. AttributeSet).
165-
// TODO: The deduplication based on simple equality will be insufficient in case of multiple CATEGORIZEs, e.g. for
165+
// TODO: The deduplication based on simple equality will be insufficient in case of multiple non-evaluatable groupings, e.g. for
166166
// `| EVAL x = y | STATS ... BY CATEGORIZE(x), CATEGORIZE(y)`. That will require semantic equality instead.
167167
LinkedHashSet<NamedExpression> resolvedGroupings = new LinkedHashSet<>();
168168
for (NamedExpression ne : upperGroupings) {

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/FoldNull.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
import org.elasticsearch.xpack.esql.core.expression.Nullability;
1515
import org.elasticsearch.xpack.esql.expression.function.aggregate.AggregateFunction;
1616
import org.elasticsearch.xpack.esql.expression.function.grouping.Categorize;
17+
import org.elasticsearch.xpack.esql.expression.function.grouping.GroupingFunction;
1718
import org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.In;
1819
import org.elasticsearch.xpack.esql.optimizer.LogicalOptimizerContext;
1920

@@ -43,9 +44,9 @@ public Expression rule(Expression e, LogicalOptimizerContext ctx) {
4344
return Literal.of(in, null);
4445
}
4546
} else if (e instanceof Alias == false && e.nullable() == Nullability.TRUE
46-
// Categorize function stays as a STATS grouping (It isn't moved to an early EVAL like other groupings),
47+
// Non-evaluatable functions stay as a STATS grouping (It isn't moved to an early EVAL like other groupings),
4748
// so folding it to null would currently break the plan, as we don't create an attribute/channel for that null value.
48-
&& e instanceof Categorize == false
49+
&& e instanceof GroupingFunction.NonEvaluatableGroupingFunction == false
4950
&& Expressions.anyMatch(e.children(), Expressions::isGuaranteedNull)) {
5051
return Literal.of(e, null);
5152
}

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/ReplaceAggregateAggExpressionWithEval.java

Lines changed: 17 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
import org.elasticsearch.xpack.esql.core.tree.Source;
1717
import org.elasticsearch.xpack.esql.core.util.Holder;
1818
import org.elasticsearch.xpack.esql.expression.function.aggregate.AggregateFunction;
19-
import org.elasticsearch.xpack.esql.expression.function.grouping.Categorize;
19+
import org.elasticsearch.xpack.esql.expression.function.grouping.GroupingFunction;
2020
import org.elasticsearch.xpack.esql.plan.logical.Aggregate;
2121
import org.elasticsearch.xpack.esql.plan.logical.Eval;
2222
import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan;
@@ -50,20 +50,20 @@ public ReplaceAggregateAggExpressionWithEval() {
5050

5151
@Override
5252
protected LogicalPlan rule(Aggregate aggregate) {
53-
// build alias map
53+
// an alias map for evaluatable grouping functions
5454
AttributeMap.Builder<Expression> aliasesBuilder = AttributeMap.builder();
55-
aggregate.forEachExpressionUp(Alias.class, a -> aliasesBuilder.put(a.toAttribute(), a.child()));
56-
var aliases = aliasesBuilder.build();
57-
58-
// Build Categorize grouping functions map.
59-
// Functions like BUCKET() shouldn't reach this point,
60-
// as they are moved to an early EVAL by ReplaceAggregateNestedExpressionWithEval
61-
Map<Categorize, Attribute> groupingAttributes = new HashMap<>();
55+
// a function map for non-evaluatable grouping functions
56+
Map<GroupingFunction.NonEvaluatableGroupingFunction, Attribute> nonEvalGroupingAttributes = new HashMap<>(
57+
aggregate.groupings().size()
58+
);
6259
aggregate.forEachExpressionUp(Alias.class, a -> {
63-
if (a.child() instanceof Categorize groupingFunction) {
64-
groupingAttributes.put(groupingFunction, a.toAttribute());
60+
if (a.child() instanceof GroupingFunction.NonEvaluatableGroupingFunction groupingFunction) {
61+
nonEvalGroupingAttributes.put(groupingFunction, a.toAttribute());
62+
} else {
63+
aliasesBuilder.put(a.toAttribute(), a.child());
6564
}
6665
});
66+
var aliases = aliasesBuilder.build();
6767

6868
// break down each aggregate into AggregateFunction and/or grouping key
6969
// preserve the projection at the end
@@ -123,8 +123,11 @@ protected LogicalPlan rule(Aggregate aggregate) {
123123
return alias.toAttribute();
124124
});
125125

126-
// replace grouping functions with their references
127-
aggExpression = aggExpression.transformUp(Categorize.class, groupingAttributes::get);
126+
// replace non-evaluatable grouping functions with their references
127+
aggExpression = aggExpression.transformUp(
128+
GroupingFunction.NonEvaluatableGroupingFunction.class,
129+
nonEvalGroupingAttributes::get
130+
);
128131

129132
Alias alias = as.replaceChild(aggExpression);
130133
newEvals.add(alias);
@@ -152,7 +155,7 @@ protected LogicalPlan rule(Aggregate aggregate) {
152155
return plan;
153156
}
154157

155-
static String syntheticName(Expression expression, Expression af, int counter) {
158+
private static String syntheticName(Expression expression, Expression af, int counter) {
156159
return TemporaryNameUtils.temporaryName(expression, af, counter);
157160
}
158161
}

0 commit comments

Comments
 (0)