Skip to content
6 changes: 6 additions & 0 deletions docs/changelog/127524.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 127524
summary: Resolve groupings in aggregate before resolving references to groupings in
the aggregations
area: ES|QL
type: bug
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -850,3 +850,37 @@ c:long | b:date
11 | 1984-05-01T00:00:00.000Z
11 | 1991-01-01T00:00:00.000Z
;

resolveGroupingsBeforeResolvingImplicitReferencesToGroupings
required_capability: resolve_groupings_before_resolving_references_to_groupings_in_aggregations

FROM employees
| STATS c = count(emp_no), b = BUCKET(hire_date, "1 year") + 1 year BY yr = BUCKET(hire_date, "1 year")
| SORT yr
| LIMIT 5
;

c:long | b:datetime | yr:datetime
11 | 1986-01-01T00:00:00.000Z | 1985-01-01T00:00:00.000Z
11 | 1987-01-01T00:00:00.000Z | 1986-01-01T00:00:00.000Z
15 | 1988-01-01T00:00:00.000Z | 1987-01-01T00:00:00.000Z
9 | 1989-01-01T00:00:00.000Z | 1988-01-01T00:00:00.000Z
13 | 1990-01-01T00:00:00.000Z | 1989-01-01T00:00:00.000Z
;

resolveGroupingsBeforeResolvingExplicitReferencesToGroupings
required_capability: resolve_groupings_before_resolving_references_to_groupings_in_aggregations

FROM employees
| STATS c = count(emp_no), b = yr + 1 year BY yr = BUCKET(hire_date, "1 year")
| SORT yr
| LIMIT 5
;

c:long | b:datetime | yr:datetime
11 | 1986-01-01T00:00:00.000Z | 1985-01-01T00:00:00.000Z
11 | 1987-01-01T00:00:00.000Z | 1986-01-01T00:00:00.000Z
15 | 1988-01-01T00:00:00.000Z | 1987-01-01T00:00:00.000Z
9 | 1989-01-01T00:00:00.000Z | 1988-01-01T00:00:00.000Z
13 | 1990-01-01T00:00:00.000Z | 1989-01-01T00:00:00.000Z
;
Original file line number Diff line number Diff line change
Expand Up @@ -3097,3 +3097,27 @@ ROW a = [1,2,3], b = 5
STD_DEV(a):double | STD_DEV(b):double
0.816496580927726 | 0.0
;

resolveGroupingsBeforeResolvingImplicitReferencesToGroupings
required_capability: resolve_groupings_before_resolving_references_to_groupings_in_aggregations

FROM employees
| EVAL date = "2025-01-01"::datetime
| stats m = MAX(hire_date) BY d = (date == "2025-01-01")
;

m:datetime | d:boolean
1999-04-30T00:00:00.000Z | true
;

resolveGroupingsBeforeResolvingExplicitReferencesToGroupings
required_capability: resolve_groupings_before_resolving_references_to_groupings_in_aggregations

FROM employees
| EVAL date = "2025-01-01"::datetime
| stats m = MAX(hire_date), x = d::int + 1 BY d = (date == "2025-01-01")
;

m:datetime | x:integer | d:boolean
1999-04-30T00:00:00.000Z | 2 | true
;
Original file line number Diff line number Diff line change
Expand Up @@ -1066,7 +1066,10 @@ public enum Cap {
*/
FIRST_OVER_TIME(Build.current().isSnapshot()),

;
/**
* Resolve groupings before resolving references to groupings in the aggregations.
*/
RESOLVE_GROUPINGS_BEFORE_RESOLVING_REFERENCES_TO_GROUPINGS_IN_AGGREGATIONS;

private final boolean enabled;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -173,13 +173,8 @@ public class Analyzer extends ParameterizedRuleExecutor<LogicalPlan, AnalyzerCon
),
new Batch<>(
"Resolution",
/*
* ImplicitCasting must be before ResolveRefs. Because a reference is created for a Bucket in Aggregate's aggregates,
* resolving this reference before implicit casting may cause this reference to have customMessage=true, it prevents further
* attempts to resolve this reference.
*/
new ImplicitCasting(),
new ResolveRefs(),
new ImplicitCasting(),
new ResolveUnionTypes() // Must be after ResolveRefs, so union types can be found
),
new Batch<>("Finish Analysis", Limiter.ONCE, new AddImplicitLimit(), new AddImplicitForkLimit(), new UnionTypesCleanup())
Expand Down Expand Up @@ -574,7 +569,7 @@ private Aggregate resolveAggregate(Aggregate aggregate, List<Attribute> children
}
}

if (Resolvables.resolved(groupings) == false || (Resolvables.resolved(aggregates) == false)) {
if (Resolvables.resolved(groupings) == false || Resolvables.resolved(aggregates) == false) {
ArrayList<Attribute> resolved = new ArrayList<>();
for (Expression e : groupings) {
Attribute attr = Expressions.attribute(e);
Expand All @@ -585,17 +580,29 @@ private Aggregate resolveAggregate(Aggregate aggregate, List<Attribute> children
List<Attribute> resolvedList = NamedExpressions.mergeOutputAttributes(resolved, childrenOutput);

List<NamedExpression> newAggregates = new ArrayList<>();
for (NamedExpression ag : aggregate.aggregates()) {
var agg = (NamedExpression) ag.transformUp(UnresolvedAttribute.class, ua -> {
Expression ne = ua;
Attribute maybeResolved = maybeResolveAttribute(ua, resolvedList);
if (maybeResolved != null) {
changed.set(true);
ne = maybeResolved;
}
return ne;
});
newAggregates.add(agg);
// If the groupings are not resolved, skip the resolution of the references to groupings in the aggregates, resolve the
// aggregations that do not reference to groupings, so that the fields/attributes referenced by the aggregations can be
// resolved, and verifier doesn't report field/reference/column not found errors for them.
boolean groupingResolved = Resolvables.resolved(groupings);
int size = groupingResolved ? aggregates.size() : aggregates.size() - groupings.size();
for (int i = 0; i < aggregates.size(); i++) {
NamedExpression maybeResolvedAgg = aggregates.get(i);
if (i < size) { // Skip resolving references to groupings in the aggregations if the groupings are not resolved yet.
maybeResolvedAgg = (NamedExpression) maybeResolvedAgg.transformUp(UnresolvedAttribute.class, ua -> {
Expression ne = ua;
Attribute maybeResolved = maybeResolveAttribute(ua, resolvedList);
// An item in aggregations can reference to groupings explicitly, if groupings are not resolved yet and
// maybeResolved is not resolved, return the original UnresolvedAttribute, so that it has another chance
// to get resolved in the next iteration.
// For example STATS c = count(emp_no), x = d::int + 1 BY d = (date == "2025-01-01")
if (groupingResolved || maybeResolved.resolved()) {
changed.set(true);
ne = maybeResolved;
}
return ne;
});
}
newAggregates.add(maybeResolvedAgg);
}

// TODO: remove this when Stats interface is removed
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,11 @@
import org.elasticsearch.xpack.esql.expression.function.fulltext.MatchOperator;
import org.elasticsearch.xpack.esql.expression.function.fulltext.MultiMatch;
import org.elasticsearch.xpack.esql.expression.function.fulltext.QueryString;
import org.elasticsearch.xpack.esql.expression.function.grouping.Bucket;
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToInteger;
import org.elasticsearch.xpack.esql.expression.function.scalar.string.Concat;
import org.elasticsearch.xpack.esql.expression.function.scalar.string.Substring;
import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.Add;
import org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.Equals;
import org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.GreaterThan;
import org.elasticsearch.xpack.esql.index.EsIndex;
Expand Down Expand Up @@ -80,6 +83,7 @@
import org.elasticsearch.xpack.esql.session.IndexResolver;

import java.io.IOException;
import java.time.Period;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
Expand Down Expand Up @@ -110,6 +114,9 @@
import static org.elasticsearch.xpack.esql.analysis.AnalyzerTestUtils.randomValueOtherThanTest;
import static org.elasticsearch.xpack.esql.analysis.AnalyzerTestUtils.tsdbIndexResolution;
import static org.elasticsearch.xpack.esql.core.tree.Source.EMPTY;
import static org.elasticsearch.xpack.esql.core.type.DataType.DATETIME;
import static org.elasticsearch.xpack.esql.core.type.DataType.DATE_PERIOD;
import static org.elasticsearch.xpack.esql.type.EsqlDataTypeConverter.dateTimeToString;
import static org.hamcrest.Matchers.contains;
import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.empty;
Expand Down Expand Up @@ -368,7 +375,7 @@ public void testNoProjection() {
DataType.INTEGER,
DataType.KEYWORD,
DataType.TEXT,
DataType.DATETIME,
DATETIME,
DataType.TEXT,
DataType.KEYWORD,
DataType.INTEGER,
Expand Down Expand Up @@ -3789,6 +3796,147 @@ public void testResolveCompletionOutputField() {
assertThat(getAttributeByName(esRelation.output(), "description"), not(equalTo(completion.targetField())));
}

public void testResolveGroupingsBeforeResolvingImplicitReferencesToGroupings() {
var plan = analyze("""
FROM test
| EVAL date = "2025-01-01"::datetime
| STATS c = count(emp_no) BY d = (date == "2025-01-01")
""", "mapping-default.json");

var limit = as(plan, Limit.class);
var agg = as(limit.child(), Aggregate.class);
var aggregates = agg.aggregates();
assertThat(aggregates, hasSize(2));
Alias a = as(aggregates.get(0), Alias.class);
assertEquals("c", a.name());
Count c = as(a.child(), Count.class);
FieldAttribute fa = as(c.field(), FieldAttribute.class);
assertEquals("emp_no", fa.name());
ReferenceAttribute ra = as(aggregates.get(1), ReferenceAttribute.class); // reference in aggregates is resolved
assertEquals("d", ra.name());
List<Expression> groupings = agg.groupings();
assertEquals(1, groupings.size());
a = as(groupings.get(0), Alias.class); // reference in groupings is resolved
assertEquals("d", ra.name());
Equals equals = as(a.child(), Equals.class);
ra = as(equals.left(), ReferenceAttribute.class);
assertEquals("date", ra.name());
Literal literal = as(equals.right(), Literal.class);
assertEquals("2025-01-01T00:00:00.000Z", dateTimeToString(Long.parseLong(literal.value().toString())));
assertEquals(DATETIME, literal.dataType());
}

public void testResolveGroupingsBeforeResolvingExplicitReferencesToGroupings() {
var plan = analyze("""
FROM test
| EVAL date = "2025-01-01"::datetime
| STATS c = count(emp_no), x = d::int + 1 BY d = (date == "2025-01-01")
""", "mapping-default.json");

var limit = as(plan, Limit.class);
var agg = as(limit.child(), Aggregate.class);
var aggregates = agg.aggregates();
assertThat(aggregates, hasSize(3));
Alias a = as(aggregates.get(0), Alias.class);
assertEquals("c", a.name());
Count c = as(a.child(), Count.class);
FieldAttribute fa = as(c.field(), FieldAttribute.class);
assertEquals("emp_no", fa.name());
a = as(aggregates.get(1), Alias.class); // explicit reference to groupings is resolved
assertEquals("x", a.name());
Add add = as(a.child(), Add.class);
ToInteger toInteger = as(add.left(), ToInteger.class);
ReferenceAttribute ra = as(toInteger.field(), ReferenceAttribute.class);
assertEquals("d", ra.name());
ra = as(aggregates.get(2), ReferenceAttribute.class); // reference in aggregates is resolved
assertEquals("d", ra.name());
List<Expression> groupings = agg.groupings();
assertEquals(1, groupings.size());
a = as(groupings.get(0), Alias.class); // reference in groupings is resolved
assertEquals("d", ra.name());
Equals equals = as(a.child(), Equals.class);
ra = as(equals.left(), ReferenceAttribute.class);
assertEquals("date", ra.name());
Literal literal = as(equals.right(), Literal.class);
assertEquals("2025-01-01T00:00:00.000Z", dateTimeToString(Long.parseLong(literal.value().toString())));
assertEquals(DATETIME, literal.dataType());
}

public void testBucketWithIntervalInStringInBothAggregationAndGrouping() {
var plan = analyze("""
FROM test
| STATS c = count(emp_no), b = BUCKET(hire_date, "1 year") + 1 year BY yr = BUCKET(hire_date, "1 year")
""", "mapping-default.json");

var limit = as(plan, Limit.class);
var agg = as(limit.child(), Aggregate.class);
var aggregates = agg.aggregates();
assertThat(aggregates, hasSize(3));
Alias a = as(aggregates.get(0), Alias.class);
assertEquals("c", a.name());
Count c = as(a.child(), Count.class);
FieldAttribute fa = as(c.field(), FieldAttribute.class);
assertEquals("emp_no", fa.name());
a = as(aggregates.get(1), Alias.class); // explicit reference to groupings is resolved
assertEquals("b", a.name());
Add add = as(a.child(), Add.class);
Bucket bucket = as(add.left(), Bucket.class);
fa = as(bucket.field(), FieldAttribute.class);
assertEquals("hire_date", fa.name());
Literal literal = as(bucket.buckets(), Literal.class);
Literal oneYear = new Literal(EMPTY, Period.ofYears(1), DATE_PERIOD);
assertEquals(oneYear, literal);
literal = as(add.right(), Literal.class);
assertEquals(oneYear, literal);
ReferenceAttribute ra = as(aggregates.get(2), ReferenceAttribute.class); // reference in aggregates is resolved
assertEquals("yr", ra.name());
List<Expression> groupings = agg.groupings();
assertEquals(1, groupings.size());
a = as(groupings.get(0), Alias.class); // reference in groupings is resolved
assertEquals("yr", ra.name());
bucket = as(a.child(), Bucket.class);
fa = as(bucket.field(), FieldAttribute.class);
assertEquals("hire_date", fa.name());
literal = as(bucket.buckets(), Literal.class);
assertEquals(oneYear, literal);
}

public void testBucketWithIntervalInStringInGroupingReferencedInAggregation() {
var plan = analyze("""
FROM test
| STATS c = count(emp_no), b = yr + 1 year BY yr = BUCKET(hire_date, "1 year")
""", "mapping-default.json");

var limit = as(plan, Limit.class);
var agg = as(limit.child(), Aggregate.class);
var aggregates = agg.aggregates();
assertThat(aggregates, hasSize(3));
Alias a = as(aggregates.get(0), Alias.class);
assertEquals("c", a.name());
Count c = as(a.child(), Count.class);
FieldAttribute fa = as(c.field(), FieldAttribute.class);
assertEquals("emp_no", fa.name());
a = as(aggregates.get(1), Alias.class); // explicit reference to groupings is resolved
assertEquals("b", a.name());
Add add = as(a.child(), Add.class);
ReferenceAttribute ra = as(add.left(), ReferenceAttribute.class);
assertEquals("yr", ra.name());
Literal oneYear = new Literal(EMPTY, Period.ofYears(1), DATE_PERIOD);
Literal literal = as(add.right(), Literal.class);
assertEquals(oneYear, literal);
ra = as(aggregates.get(2), ReferenceAttribute.class); // reference in aggregates is resolved
assertEquals("yr", ra.name());
List<Expression> groupings = agg.groupings();
assertEquals(1, groupings.size());
a = as(groupings.get(0), Alias.class); // reference in groupings is resolved
assertEquals("yr", ra.name());
Bucket bucket = as(a.child(), Bucket.class);
fa = as(bucket.field(), FieldAttribute.class);
assertEquals("hire_date", fa.name());
literal = as(bucket.buckets(), Literal.class);
assertEquals(oneYear, literal);
}

@Override
protected IndexAnalyzers createDefaultIndexAnalyzers() {
return super.createDefaultIndexAnalyzers();
Expand Down
Loading