Skip to content

Commit fa541d2

Browse files
authored
[8.x] ESQL: optimise aggregations filtered by false/null into evals (#115858) (#116713)
* ESQL: optimise aggregations filtered by false/null into evals (#115858) This adds a new optimiser rule to extract aggregate functions filtered by a `FALSE` or `NULL` into evals. The value taken by the evaluation is `0L`, for `COUNT()` and `COUNT_DISTINCT()`, `NULL` otherwise. Example: ``` ... | STATS x = someAgg(y) WHERE FALSE {BY z} | ... => ... | STATS x = someAgg(y) {BY z} > | EVAL x = NULL | KEEP x{, z} | ... ``` Related: #114352. * swap out list's getFirst/Last
1 parent 08f8312 commit fa541d2

File tree

7 files changed

+505
-7
lines changed

7 files changed

+505
-7
lines changed

docs/changelog/115858.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 115858
2+
summary: "ESQL: optimise aggregations filtered by false/null into evals"
3+
area: ES|QL
4+
type: enhancement
5+
issues: []

x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2382,6 +2382,116 @@ max:integer |max_a:integer|min:integer | min_a:integer
23822382
74999 |null |25324 | null
23832383
;
23842384

2385+
statsWithAllFiltersFalse
2386+
required_capability: per_agg_filtering
2387+
from employees
2388+
| stats max = max(height.float) where false,
2389+
min = min(height.float) where to_string(null) == "abc",
2390+
count = count(height.float) where false,
2391+
count_distinct = count_distinct(salary) where to_string(null) == "def"
2392+
;
2393+
2394+
max:double |min:double |count:long |count_distinct:long
2395+
null |null |0 |0
2396+
;
2397+
2398+
statsWithExpressionsAllFiltersFalse
2399+
required_capability: per_agg_filtering
2400+
from employees
2401+
| stats max = max(height.float + 1) where null,
2402+
count = count(height.float) + 2 where false,
2403+
mix = min(height.float + 1) + count_distinct(emp_no) + 2 where length(null) == 3
2404+
;
2405+
2406+
max:double |count:long |mix:double
2407+
null |2 |null
2408+
;
2409+
2410+
statsWithFalseFilterAndGroup
2411+
required_capability: per_agg_filtering
2412+
from employees
2413+
| stats max = max(height.float + 1) where null,
2414+
count = count(height.float) + 2 where false
2415+
by job_positions
2416+
| sort job_positions
2417+
| limit 4
2418+
;
2419+
2420+
max:double |count:long |job_positions:keyword
2421+
null |2 |Accountant
2422+
null |2 |Architect
2423+
null |2 |Business Analyst
2424+
null |2 |Data Scientist
2425+
;
2426+
2427+
statsWithFalseFiltersAndGroups
2428+
required_capability: per_agg_filtering
2429+
from employees
2430+
| eval my_length = length(concat(first_name, null))
2431+
| stats count_distinct = count_distinct(height.float + 1) where null,
2432+
count = count(height.float) + 2 where false,
2433+
values = values(first_name) where my_length > 3
2434+
by job_positions, is_rehired
2435+
| sort job_positions, is_rehired
2436+
| limit 10
2437+
;
2438+
2439+
count_distinct:long |count:long |values:keyword |job_positions:keyword |is_rehired:boolean
2440+
0 |2 |null |Accountant |false
2441+
0 |2 |null |Accountant |true
2442+
0 |2 |null |Accountant |null
2443+
0 |2 |null |Architect |false
2444+
0 |2 |null |Architect |true
2445+
0 |2 |null |Architect |null
2446+
0 |2 |null |Business Analyst |false
2447+
0 |2 |null |Business Analyst |true
2448+
0 |2 |null |Business Analyst |null
2449+
0 |2 |null |Data Scientist |false
2450+
;
2451+
2452+
statsWithMixedFiltersAndGroup
2453+
required_capability: per_agg_filtering
2454+
from employees
2455+
| eval my_length = length(concat(first_name, null))
2456+
| stats count = count(my_length) where false,
2457+
values = mv_slice(mv_sort(values(first_name)), 0, 1)
2458+
by job_positions
2459+
| sort job_positions
2460+
| limit 4
2461+
;
2462+
2463+
count:long |values:keyword |job_positions:keyword
2464+
0 |[Arumugam, Bojan] |Accountant
2465+
0 |[Alejandro, Charlene] |Architect
2466+
0 |[Basil, Breannda] |Business Analyst
2467+
0 |[Berni, Breannda] |Data Scientist
2468+
;
2469+
2470+
prunedStatsFollowedByStats
2471+
from employees
2472+
| eval my_length = length(concat(first_name, null))
2473+
| stats count = count(my_length) where false,
2474+
values = mv_slice(values(first_name), 0, 1) where my_length > 0
2475+
| stats count_distinct = count_distinct(count)
2476+
;
2477+
2478+
count_distinct:long
2479+
1
2480+
;
2481+
2482+
statsWithFalseFiltersFromRow
2483+
required_capability: per_agg_filtering
2484+
row x = null, a = 1, b = [2,3,4]
2485+
| stats c=max(a) where x
2486+
by b
2487+
;
2488+
2489+
c:integer |b:integer
2490+
null |2
2491+
null |3
2492+
null |4
2493+
;
2494+
23852495
statsWithBasicExpressionFiltered
23862496
required_capability: per_agg_filtering
23872497
from employees

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalLogicalPlanOptimizer.java

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,15 @@
88
package org.elasticsearch.xpack.esql.optimizer;
99

1010
import org.elasticsearch.xpack.esql.optimizer.rules.logical.PropagateEmptyRelation;
11+
import org.elasticsearch.xpack.esql.optimizer.rules.logical.ReplaceStatsFilteredAggWithEval;
1112
import org.elasticsearch.xpack.esql.optimizer.rules.logical.local.InferIsNotNull;
1213
import org.elasticsearch.xpack.esql.optimizer.rules.logical.local.InferNonNullAggConstraint;
1314
import org.elasticsearch.xpack.esql.optimizer.rules.logical.local.LocalPropagateEmptyRelation;
1415
import org.elasticsearch.xpack.esql.optimizer.rules.logical.local.ReplaceMissingFieldWithNull;
1516
import org.elasticsearch.xpack.esql.optimizer.rules.logical.local.ReplaceTopNWithLimitAndSort;
1617
import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan;
1718
import org.elasticsearch.xpack.esql.rule.ParameterizedRuleExecutor;
19+
import org.elasticsearch.xpack.esql.rule.Rule;
1820

1921
import java.util.ArrayList;
2022
import java.util.List;
@@ -50,20 +52,31 @@ protected List<Batch<LogicalPlan>> batches() {
5052
rules.add(local);
5153
// TODO: if the local rules haven't touched the tree, the rest of the rules can be skipped
5254
rules.addAll(asList(operators(), cleanup()));
53-
replaceRules(rules);
54-
return rules;
55+
return replaceRules(rules);
5556
}
5657

58+
@SuppressWarnings("unchecked")
5759
private List<Batch<LogicalPlan>> replaceRules(List<Batch<LogicalPlan>> listOfRules) {
58-
for (Batch<LogicalPlan> batch : listOfRules) {
60+
List<Batch<LogicalPlan>> newBatches = new ArrayList<>(listOfRules.size());
61+
for (var batch : listOfRules) {
5962
var rules = batch.rules();
60-
for (int i = 0; i < rules.length; i++) {
61-
if (rules[i] instanceof PropagateEmptyRelation) {
62-
rules[i] = new LocalPropagateEmptyRelation();
63+
List<Rule<?, LogicalPlan>> newRules = new ArrayList<>(rules.length);
64+
boolean updated = false;
65+
for (var r : rules) {
66+
if (r instanceof PropagateEmptyRelation) {
67+
newRules.add(new LocalPropagateEmptyRelation());
68+
updated = true;
69+
} else if (r instanceof ReplaceStatsFilteredAggWithEval) {
70+
// skip it: once a fragment contains an Agg, this can no longer be pruned, which the rule can do
71+
updated = true;
72+
} else {
73+
newRules.add(r);
6374
}
6475
}
76+
batch = updated ? batch.with(newRules.toArray(Rule[]::new)) : batch;
77+
newBatches.add(batch);
6578
}
66-
return listOfRules;
79+
return newBatches;
6780
}
6881

6982
public LogicalPlan localOptimize(LogicalPlan plan) {

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizer.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646
import org.elasticsearch.xpack.esql.optimizer.rules.logical.ReplaceLimitAndSortAsTopN;
4747
import org.elasticsearch.xpack.esql.optimizer.rules.logical.ReplaceOrderByExpressionWithEval;
4848
import org.elasticsearch.xpack.esql.optimizer.rules.logical.ReplaceRegexMatch;
49+
import org.elasticsearch.xpack.esql.optimizer.rules.logical.ReplaceStatsFilteredAggWithEval;
4950
import org.elasticsearch.xpack.esql.optimizer.rules.logical.ReplaceTrivialTypeConversions;
5051
import org.elasticsearch.xpack.esql.optimizer.rules.logical.SetAsOptimized;
5152
import org.elasticsearch.xpack.esql.optimizer.rules.logical.SimplifyComparisonsArithmetics;
@@ -170,6 +171,7 @@ protected static Batch<LogicalPlan> operators() {
170171
new CombineBinaryComparisons(),
171172
new CombineDisjunctions(),
172173
new SimplifyComparisonsArithmetics(DataType::areCompatible),
174+
new ReplaceStatsFilteredAggWithEval(),
173175
// prune/elimination
174176
new PruneFilters(),
175177
new PruneColumns(),
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License
4+
* 2.0; you may not use this file except in compliance with the Elastic License
5+
* 2.0.
6+
*/
7+
8+
package org.elasticsearch.xpack.esql.optimizer.rules.logical;
9+
10+
import org.elasticsearch.compute.data.Block;
11+
import org.elasticsearch.compute.data.BlockUtils;
12+
import org.elasticsearch.xpack.esql.core.expression.Alias;
13+
import org.elasticsearch.xpack.esql.core.expression.Attribute;
14+
import org.elasticsearch.xpack.esql.core.expression.Literal;
15+
import org.elasticsearch.xpack.esql.core.expression.NamedExpression;
16+
import org.elasticsearch.xpack.esql.core.tree.Source;
17+
import org.elasticsearch.xpack.esql.expression.function.aggregate.AggregateFunction;
18+
import org.elasticsearch.xpack.esql.expression.function.aggregate.Count;
19+
import org.elasticsearch.xpack.esql.expression.function.aggregate.CountDistinct;
20+
import org.elasticsearch.xpack.esql.plan.logical.Aggregate;
21+
import org.elasticsearch.xpack.esql.plan.logical.Eval;
22+
import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan;
23+
import org.elasticsearch.xpack.esql.plan.logical.Project;
24+
import org.elasticsearch.xpack.esql.plan.logical.local.LocalRelation;
25+
import org.elasticsearch.xpack.esql.plan.logical.local.LocalSupplier;
26+
import org.elasticsearch.xpack.esql.planner.PlannerUtils;
27+
28+
import java.util.ArrayList;
29+
import java.util.List;
30+
31+
/**
32+
* Replaces an aggregation function having a false/null filter with an EVAL node.
33+
* <pre>
34+
* ... | STATS x = someAgg(y) WHERE FALSE {BY z} | ...
35+
* =>
36+
* ... | STATS x = someAgg(y) {BY z} > | EVAL x = NULL | KEEP x{, z} | ...
37+
* </pre>
38+
*/
39+
public class ReplaceStatsFilteredAggWithEval extends OptimizerRules.OptimizerRule<Aggregate> {
40+
@Override
41+
protected LogicalPlan rule(Aggregate aggregate) {
42+
int oldAggSize = aggregate.aggregates().size();
43+
List<NamedExpression> newAggs = new ArrayList<>(oldAggSize);
44+
List<Alias> newEvals = new ArrayList<>(oldAggSize);
45+
List<NamedExpression> newProjections = new ArrayList<>(oldAggSize);
46+
47+
for (var ne : aggregate.aggregates()) {
48+
if (ne instanceof Alias alias
49+
&& alias.child() instanceof AggregateFunction aggFunction
50+
&& aggFunction.hasFilter()
51+
&& aggFunction.filter() instanceof Literal literal
52+
&& Boolean.FALSE.equals(literal.fold())) {
53+
54+
Object value = aggFunction instanceof Count || aggFunction instanceof CountDistinct ? 0L : null;
55+
Alias newAlias = alias.replaceChild(Literal.of(aggFunction, value));
56+
newEvals.add(newAlias);
57+
newProjections.add(newAlias.toAttribute());
58+
} else {
59+
newAggs.add(ne); // agg function unchanged or grouping key
60+
newProjections.add(ne.toAttribute());
61+
}
62+
}
63+
64+
LogicalPlan plan = aggregate;
65+
if (newEvals.isEmpty() == false) {
66+
if (newAggs.isEmpty()) { // the Aggregate node is pruned
67+
plan = localRelation(aggregate.source(), newEvals);
68+
} else {
69+
plan = aggregate.with(aggregate.child(), aggregate.groupings(), newAggs);
70+
plan = new Eval(aggregate.source(), plan, newEvals);
71+
plan = new Project(aggregate.source(), plan, newProjections);
72+
}
73+
}
74+
return plan;
75+
}
76+
77+
private static LocalRelation localRelation(Source source, List<Alias> newEvals) {
78+
Block[] blocks = new Block[newEvals.size()];
79+
List<Attribute> attributes = new ArrayList<>(newEvals.size());
80+
for (int i = 0; i < newEvals.size(); i++) {
81+
Alias alias = newEvals.get(i);
82+
attributes.add(alias.toAttribute());
83+
blocks[i] = BlockUtils.constantBlock(PlannerUtils.NON_BREAKING_BLOCK_FACTORY, ((Literal) alias.child()).value(), 1);
84+
}
85+
return new LocalRelation(source, attributes, LocalSupplier.of(blocks));
86+
87+
}
88+
}

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/rule/RuleExecutor.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,10 @@ public String name() {
6868
return name;
6969
}
7070

71+
public Batch<TreeType> with(Rule<?, TreeType>[] rules) {
72+
return new Batch<>(name, limit, rules);
73+
}
74+
7175
public Rule<?, TreeType>[] rules() {
7276
return rules;
7377
}

0 commit comments

Comments
 (0)