Skip to content

Commit f6024f7

Browse files
authored
Avoid dropping aggregate groupings in local plans (#129370) (#130060) (#130128)
The local plan optimizer should not change the layout, as it has already been agreed upon. However, CombineProjections can violate this when some grouping elements refer to the same attribute. This occurs when ReplaceFieldWithConstantOrNull replaces missing fields with the same reference for a given data type. Closes #128054 Closes #129811
1 parent 0a4c8d8 commit f6024f7

File tree

6 files changed

+172
-45
lines changed

6 files changed

+172
-45
lines changed

docs/changelog/129370.yaml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
pr: 129370
2+
summary: Avoid dropping aggregate groupings in local plans
3+
area: ES|QL
4+
type: bug
5+
issues:
6+
- 129811
7+
- 128054

x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/EsqlActionIT.java

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1454,6 +1454,39 @@ public void testQueryOnEmptyDataIndex() {
14541454
}
14551455
}
14561456

1457+
public void testGroupingStatsOnMissingFields() {
1458+
assertAcked(client().admin().indices().prepareCreate("missing_field_index").setMapping("data", "type=long"));
1459+
long oneValue = between(1, 1000);
1460+
indexDoc("missing_field_index", "1", "data", oneValue);
1461+
refresh("missing_field_index");
1462+
QueryPragmas pragmas = randomPragmas();
1463+
pragmas = new QueryPragmas(
1464+
Settings.builder().put(pragmas.getSettings()).put(QueryPragmas.MAX_CONCURRENT_SHARDS_PER_NODE.getKey(), 1).build()
1465+
);
1466+
EsqlQueryRequest request = new EsqlQueryRequest();
1467+
request.query("FROM missing_field_index,test | STATS s = sum(data) BY color, tag | SORT color");
1468+
request.pragmas(pragmas);
1469+
try (var r = run(request)) {
1470+
var rows = getValuesList(r);
1471+
assertThat(rows, hasSize(4));
1472+
for (List<Object> row : rows) {
1473+
assertThat(row, hasSize(3));
1474+
}
1475+
assertThat(rows.get(0).get(0), equalTo(20L));
1476+
assertThat(rows.get(0).get(1), equalTo("blue"));
1477+
assertNull(rows.get(0).get(2));
1478+
assertThat(rows.get(1).get(0), equalTo(10L));
1479+
assertThat(rows.get(1).get(1), equalTo("green"));
1480+
assertNull(rows.get(1).get(2));
1481+
assertThat(rows.get(2).get(0), equalTo(30L));
1482+
assertThat(rows.get(2).get(1), equalTo("red"));
1483+
assertNull(rows.get(2).get(2));
1484+
assertThat(rows.get(3).get(0), equalTo(oneValue));
1485+
assertNull(rows.get(3).get(1));
1486+
assertNull(rows.get(3).get(2));
1487+
}
1488+
}
1489+
14571490
private void assertEmptyIndexQueries(String from) {
14581491
try (EsqlQueryResponse resp = run(from + "METADATA _source | KEEP _source | LIMIT 1")) {
14591492
assertFalse(resp.values().hasNext());
@@ -1588,6 +1621,8 @@ private void createAndPopulateIndex(String indexName, Settings additionalSetting
15881621
"time",
15891622
"type=long",
15901623
"color",
1624+
"type=keyword",
1625+
"tag",
15911626
"type=keyword"
15921627
)
15931628
);

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalLogicalPlanOptimizer.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,8 @@
2929
* This class is part of the planner. Data node level logical optimizations. At this point we have access to
3030
* {@link org.elasticsearch.xpack.esql.stats.SearchStats} which provides access to metadata about the index.
3131
*
32-
* <p>NB: This class also reapplies all the rules from {@link LogicalPlanOptimizer#operators()} and {@link LogicalPlanOptimizer#cleanup()}
32+
* <p>NB: This class also reapplies all the rules from {@link LogicalPlanOptimizer#operators(boolean)}
33+
* and {@link LogicalPlanOptimizer#cleanup()}
3334
*/
3435
public class LocalLogicalPlanOptimizer extends ParameterizedRuleExecutor<LogicalPlan, LocalLogicalOptimizerContext> {
3536

@@ -51,7 +52,7 @@ protected List<Batch<LogicalPlan>> batches() {
5152
var rules = new ArrayList<Batch<LogicalPlan>>();
5253
rules.add(local);
5354
// TODO: if the local rules haven't touched the tree, the rest of the rules can be skipped
54-
rules.addAll(asList(operators(), cleanup()));
55+
rules.addAll(asList(operators(true), cleanup()));
5556
return replaceRules(rules);
5657
}
5758

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizer.java

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -77,15 +77,15 @@
7777
* <li>The {@link LogicalPlanOptimizer#substitutions()} phase rewrites things to expand out shorthand in the syntax. For example,
7878
* a nested expression embedded in a stats gets replaced with an eval followed by a stats, followed by another eval. This phase
7979
* also applies surrogates, such as replacing an average with a sum divided by a count.</li>
80-
* <li>{@link LogicalPlanOptimizer#operators()} (NB: The word "operator" is extremely overloaded and referrers to many different
80+
* <li>{@link LogicalPlanOptimizer#operators(boolean)} (NB: The word "operator" is extremely overloaded and referrers to many different
8181
* things.) transform the tree in various different ways. This includes folding (i.e. computing constant expressions at parse
8282
* time), combining expressions, dropping redundant clauses, and some normalization such as putting literals on the right whenever
8383
* possible. These rules are run in a loop until none of the rules make any changes to the plan (there is also a safety shut off
8484
* after many iterations, although hitting that is considered a bug)</li>
8585
* <li>{@link LogicalPlanOptimizer#cleanup()} Which can replace sorts+limit with a TopN</li>
8686
* </ul>
8787
*
88-
* <p>Note that the {@link LogicalPlanOptimizer#operators()} and {@link LogicalPlanOptimizer#cleanup()} steps are reapplied at the
88+
* <p>Note that the {@link LogicalPlanOptimizer#operators(boolean)} and {@link LogicalPlanOptimizer#cleanup()} steps are reapplied at the
8989
* {@link LocalLogicalPlanOptimizer} layer.</p>
9090
*/
9191
public class LogicalPlanOptimizer extends ParameterizedRuleExecutor<LogicalPlan, LogicalOptimizerContext> {
@@ -117,7 +117,7 @@ protected static List<Batch<LogicalPlan>> rules() {
117117
var defaultTopN = new Batch<>("Add default TopN", new AddDefaultTopN());
118118
var label = new Batch<>("Set as Optimized", Limiter.ONCE, new SetAsOptimized());
119119

120-
return asList(substitutions(), operators(), skip, cleanup(), defaultTopN, label);
120+
return asList(substitutions(), operators(false), skip, cleanup(), defaultTopN, label);
121121
}
122122

123123
protected static Batch<LogicalPlan> substitutions() {
@@ -150,10 +150,10 @@ protected static Batch<LogicalPlan> substitutions() {
150150
);
151151
}
152152

153-
protected static Batch<LogicalPlan> operators() {
153+
protected static Batch<LogicalPlan> operators(boolean local) {
154154
return new Batch<>(
155155
"Operator Optimization",
156-
new CombineProjections(),
156+
new CombineProjections(local),
157157
new CombineEvals(),
158158
new PruneEmptyPlans(),
159159
new PropagateEmptyRelation(),

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/CombineProjections.java

Lines changed: 89 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -15,18 +15,26 @@
1515
import org.elasticsearch.xpack.esql.core.expression.Expression;
1616
import org.elasticsearch.xpack.esql.core.expression.Expressions;
1717
import org.elasticsearch.xpack.esql.core.expression.NamedExpression;
18+
import org.elasticsearch.xpack.esql.expression.function.grouping.Categorize;
1819
import org.elasticsearch.xpack.esql.plan.logical.Aggregate;
20+
import org.elasticsearch.xpack.esql.plan.logical.Eval;
1921
import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan;
2022
import org.elasticsearch.xpack.esql.plan.logical.Project;
2123
import org.elasticsearch.xpack.esql.plan.logical.UnaryPlan;
2224

2325
import java.util.ArrayList;
26+
import java.util.HashSet;
27+
import java.util.LinkedHashSet;
2428
import java.util.List;
29+
import java.util.Set;
2530

2631
public final class CombineProjections extends OptimizerRules.OptimizerRule<UnaryPlan> {
32+
// don't drop groupings from a local plan, as the layout has already been agreed upon
33+
private final boolean local;
2734

28-
public CombineProjections() {
35+
public CombineProjections(boolean local) {
2936
super(OptimizerRules.TransformDirection.UP);
37+
this.local = local;
3038
}
3139

3240
@Override
@@ -57,26 +65,89 @@ protected LogicalPlan rule(UnaryPlan plan) {
5765
return plan;
5866
}
5967

60-
// Agg with underlying Project (group by on sub-queries)
61-
if (plan instanceof Aggregate a) {
62-
if (child instanceof Project p) {
63-
var groupings = a.groupings();
64-
List<Attribute> groupingAttrs = new ArrayList<>(a.groupings().size());
65-
for (Expression grouping : groupings) {
66-
if (grouping instanceof Attribute attribute) {
67-
groupingAttrs.add(attribute);
68+
if (plan instanceof Aggregate a && child instanceof Project p) {
69+
var groupings = a.groupings();
70+
71+
// sanity checks
72+
for (Expression grouping : groupings) {
73+
if ((grouping instanceof Attribute || grouping instanceof Alias as && as.child() instanceof Categorize) == false) {
74+
// After applying ReplaceAggregateNestedExpressionWithEval,
75+
// evaluatable groupings can only contain attributes.
76+
throw new EsqlIllegalArgumentException("Expected an attribute or grouping function, got {}", grouping);
77+
}
78+
}
79+
assert groupings.size() <= 1
80+
|| groupings.stream().anyMatch(group -> group.anyMatch(expr -> expr instanceof Categorize)) == false
81+
: "CombineProjections only tested with a single CATEGORIZE with no additional groups";
82+
83+
// Collect the alias map for resolving the source (f1 = 1, f2 = f1, etc..)
84+
AttributeMap.Builder<Attribute> aliasesBuilder = AttributeMap.builder();
85+
for (NamedExpression ne : p.projections()) {
86+
// Record the aliases.
87+
// Projections are just aliases for attributes, so casting is safe.
88+
aliasesBuilder.put(ne.toAttribute(), (Attribute) Alias.unwrap(ne));
89+
}
90+
var aliases = aliasesBuilder.build();
91+
92+
// Propagate any renames from the lower projection into the upper groupings.
93+
List<Expression> resolvedGroupings = new ArrayList<>();
94+
for (Expression grouping : groupings) {
95+
Expression transformed = grouping.transformUp(Attribute.class, as -> aliases.resolve(as, as));
96+
resolvedGroupings.add(transformed);
97+
}
98+
99+
// This can lead to duplicates in the groupings: e.g.
100+
// | EVAL x = y | STATS ... BY x, y
101+
if (local) {
102+
// On the data node, the groupings must be preserved because they affect the physical output (see
103+
// AbstractPhysicalOperationProviders#intermediateAttributes).
104+
// In case that propagating the lower projection leads to duplicates in the resolved groupings, we'll leave an Eval in place
105+
// of the original projection to create new attributes for the duplicate groups.
106+
Set<Expression> seenResolvedGroupings = new HashSet<>(resolvedGroupings.size());
107+
List<Expression> newGroupings = new ArrayList<>();
108+
List<Alias> aliasesAgainstDuplication = new ArrayList<>();
109+
110+
for (int i = 0; i < groupings.size(); i++) {
111+
Expression resolvedGrouping = resolvedGroupings.get(i);
112+
if (seenResolvedGroupings.add(resolvedGrouping)) {
113+
newGroupings.add(resolvedGrouping);
68114
} else {
69-
// After applying ReplaceAggregateNestedExpressionWithEval, groupings can only contain attributes.
70-
throw new EsqlIllegalArgumentException("Expected an Attribute, got {}", grouping);
115+
// resolving the renames leads to a duplicate here - we need to alias the underlying attribute this refers to.
116+
// should really only be 1 attribute, anyway, but going via .references() includes the case of a
117+
// GroupingFunction.NonEvaluatableGroupingFunction.
118+
Attribute coreAttribute = resolvedGrouping.references().iterator().next();
119+
120+
Alias renameAgainstDuplication = new Alias(
121+
coreAttribute.source(),
122+
TemporaryNameUtils.locallyUniqueTemporaryName(coreAttribute.name(), "temp_name"),
123+
coreAttribute
124+
);
125+
aliasesAgainstDuplication.add(renameAgainstDuplication);
126+
127+
// propagate the new alias into the new grouping
128+
AttributeMap.Builder<Attribute> resolverBuilder = AttributeMap.builder();
129+
resolverBuilder.put(coreAttribute, renameAgainstDuplication.toAttribute());
130+
AttributeMap<Attribute> resolver = resolverBuilder.build();
131+
132+
newGroupings.add(resolvedGrouping.transformUp(Attribute.class, attr -> resolver.resolve(attr, attr)));
71133
}
72134
}
73-
plan = new Aggregate(
74-
a.source(),
75-
p.child(),
76-
a.aggregateType(),
77-
combineUpperGroupingsAndLowerProjections(groupingAttrs, p.projections()),
78-
combineProjections(a.aggregates(), p.projections())
79-
);
135+
136+
LogicalPlan newChild = aliasesAgainstDuplication.isEmpty()
137+
? p.child()
138+
: new Eval(p.source(), p.child(), aliasesAgainstDuplication);
139+
plan = a.with(newChild, newGroupings, combineProjections(a.aggregates(), p.projections()));
140+
} else {
141+
// On the coordinator, we can just discard the duplicates.
142+
// All substitutions happen before; groupings must be attributes at this point except for non-evaluatable groupings which
143+
// will be an alias like `c = CATEGORIZE(attribute)`.
144+
// Due to such aliases, we can't use an AttributeSet to deduplicate. But we can use a regular set to deduplicate based on
145+
// regular equality (i.e. based on names) instead of name ids.
146+
// TODO: The deduplication based on simple equality will be insufficient in case of multiple non-evaluatable groupings, e.g.
147+
// for `| EVAL x = y | STATS ... BY CATEGORIZE(x), CATEGORIZE(y)`. That will require semantic equality instead. Also
148+
// applies in the local case below.
149+
List<Expression> newGroupings = new ArrayList<>(new LinkedHashSet<>(resolvedGroupings));
150+
plan = a.with(p.child(), newGroupings, combineProjections(a.aggregates(), p.projections()));
80151
}
81152
}
82153

@@ -136,26 +207,6 @@ private static List<NamedExpression> combineProjections(List<? extends NamedExpr
136207
return replaced;
137208
}
138209

139-
private static List<Expression> combineUpperGroupingsAndLowerProjections(
140-
List<? extends Attribute> upperGroupings,
141-
List<? extends NamedExpression> lowerProjections
142-
) {
143-
// Collect the alias map for resolving the source (f1 = 1, f2 = f1, etc..)
144-
AttributeMap<Attribute> aliases = new AttributeMap<>();
145-
for (NamedExpression ne : lowerProjections) {
146-
// Projections are just aliases for attributes, so casting is safe.
147-
aliases.put(ne.toAttribute(), (Attribute) Alias.unwrap(ne));
148-
}
149-
150-
// Replace any matching attribute directly with the aliased attribute from the projection.
151-
AttributeSet replaced = new AttributeSet();
152-
for (Attribute attr : upperGroupings) {
153-
// All substitutions happen before; groupings must be attributes at this point.
154-
replaced.add(aliases.resolve(attr, attr));
155-
}
156-
return new ArrayList<>(replaced);
157-
}
158-
159210
/**
160211
* Replace grouping alias previously contained in the aggregations that might have been projected away.
161212
*/

x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalLogicalPlanOptimizerTests.java

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import org.elasticsearch.xpack.esql.core.expression.Expressions;
2222
import org.elasticsearch.xpack.esql.core.expression.FieldAttribute;
2323
import org.elasticsearch.xpack.esql.core.expression.Literal;
24+
import org.elasticsearch.xpack.esql.core.expression.ReferenceAttribute;
2425
import org.elasticsearch.xpack.esql.core.expression.predicate.logical.And;
2526
import org.elasticsearch.xpack.esql.core.expression.predicate.nulls.IsNotNull;
2627
import org.elasticsearch.xpack.esql.core.tree.NodeInfo;
@@ -36,6 +37,7 @@
3637
import org.elasticsearch.xpack.esql.index.IndexResolution;
3738
import org.elasticsearch.xpack.esql.optimizer.rules.logical.local.InferIsNotNull;
3839
import org.elasticsearch.xpack.esql.parser.EsqlParser;
40+
import org.elasticsearch.xpack.esql.plan.logical.Aggregate;
3941
import org.elasticsearch.xpack.esql.plan.logical.EsRelation;
4042
import org.elasticsearch.xpack.esql.plan.logical.Eval;
4143
import org.elasticsearch.xpack.esql.plan.logical.Filter;
@@ -73,6 +75,7 @@
7375
import static org.elasticsearch.xpack.esql.EsqlTestUtils.withDefaultLimitWarning;
7476
import static org.elasticsearch.xpack.esql.core.tree.Source.EMPTY;
7577
import static org.hamcrest.Matchers.contains;
78+
import static org.hamcrest.Matchers.equalTo;
7679
import static org.hamcrest.Matchers.hasSize;
7780
import static org.hamcrest.Matchers.is;
7881
import static org.hamcrest.Matchers.not;
@@ -587,6 +590,36 @@ public void testIsNotNullOnCase_With_IS_NULL() {
587590
var source = as(filter.child(), EsRelation.class);
588591
}
589592

593+
/**
594+
* \_Aggregate[[first_name{r}#7, $$first_name$temp_name$17{r}#18],[SUM(salary{f}#11,true[BOOLEAN]) AS SUM(salary)#5, first_nam
595+
* e{r}#7, first_name{r}#7 AS last_name#10]]
596+
* \_Eval[[null[KEYWORD] AS first_name#7, null[KEYWORD] AS $$first_name$temp_name$17#18]]
597+
* \_EsRelation[test][_meta_field{f}#12, emp_no{f}#6, first_name{f}#7, ge..]
598+
*/
599+
public void testGroupingByMissingFields() {
600+
var plan = plan("FROM test | STATS SUM(salary) BY first_name, last_name");
601+
var testStats = statsForMissingField("first_name", "last_name");
602+
var localPlan = localPlan(plan, testStats);
603+
Limit limit = as(localPlan, Limit.class);
604+
Aggregate aggregate = as(limit.child(), Aggregate.class);
605+
assertThat(aggregate.groupings(), hasSize(2));
606+
ReferenceAttribute grouping1 = as(aggregate.groupings().get(0), ReferenceAttribute.class);
607+
ReferenceAttribute grouping2 = as(aggregate.groupings().get(1), ReferenceAttribute.class);
608+
Eval eval = as(aggregate.child(), Eval.class);
609+
assertThat(eval.fields(), hasSize(2));
610+
Alias eval1 = eval.fields().get(0);
611+
Literal literal1 = as(eval1.child(), Literal.class);
612+
assertNull(literal1.value());
613+
assertThat(literal1.dataType(), is(DataType.KEYWORD));
614+
Alias eval2 = eval.fields().get(1);
615+
Literal literal2 = as(eval2.child(), Literal.class);
616+
assertNull(literal2.value());
617+
assertThat(literal2.dataType(), is(DataType.KEYWORD));
618+
assertThat(grouping1.id(), equalTo(eval1.id()));
619+
assertThat(grouping2.id(), equalTo(eval2.id()));
620+
as(eval.child(), EsRelation.class);
621+
}
622+
590623
private IsNotNull isNotNull(Expression field) {
591624
return new IsNotNull(EMPTY, field);
592625
}

0 commit comments

Comments
 (0)