Skip to content
6 changes: 6 additions & 0 deletions docs/changelog/127524.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 127524
summary: Resolve groupings in aggregate before resolving references to groupings in
the aggregations
area: ES|QL
type: bug
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -173,13 +173,13 @@ public class Analyzer extends ParameterizedRuleExecutor<LogicalPlan, AnalyzerCon
),
new Batch<>(
"Resolution",
new ResolveRefs(),
/*
* ImplicitCasting must be before ResolveRefs. Because a reference is created for a Bucket in Aggregate's aggregates,
* resolving this reference before implicit casting may cause this reference to have customMessage=true, it prevents further
* attempts to resolve this reference.
*/
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. why does the order matter now vs before.
  2. the comment no longer hold true (looks like it was a harbinger of things to come).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The reason that ImplicitCasting was put before ResolveRefs is exactly because that resolveAggregate was attempting to resolve the references to the groupings, before the groupings are resolved, and it causes the references to the groupings marked unresolvable too early. This comment is outdated and removed. Thanks for the reminder!

new ImplicitCasting(),
new ResolveRefs(),
new ResolveUnionTypes() // Must be after ResolveRefs, so union types can be found
),
new Batch<>("Finish Analysis", Limiter.ONCE, new AddImplicitLimit(), new AddImplicitForkLimit(), new UnionTypesCleanup())
Expand Down Expand Up @@ -574,7 +574,7 @@ private Aggregate resolveAggregate(Aggregate aggregate, List<Attribute> children
}
}

if (Resolvables.resolved(groupings) == false || (Resolvables.resolved(aggregates) == false)) {
if (Resolvables.resolved(groupings) == false || Resolvables.resolved(aggregates) == false) {
ArrayList<Attribute> resolved = new ArrayList<>();
for (Expression e : groupings) {
Attribute attr = Expressions.attribute(e);
Expand All @@ -585,16 +585,28 @@ private Aggregate resolveAggregate(Aggregate aggregate, List<Attribute> children
List<Attribute> resolvedList = NamedExpressions.mergeOutputAttributes(resolved, childrenOutput);

List<NamedExpression> newAggregates = new ArrayList<>();
for (NamedExpression ag : aggregate.aggregates()) {
var agg = (NamedExpression) ag.transformUp(UnresolvedAttribute.class, ua -> {
Expression ne = ua;
Attribute maybeResolved = maybeResolveAttribute(ua, resolvedList);
if (maybeResolved != null) {
changed.set(true);
ne = maybeResolved;
}
return ne;
});
// If the groupings are not resolved, skip the resolution of the references to groupings in the aggregates, resolve the
// aggregations that do not reference to groupings, so that the fields/attributes referenced by the aggregations can be
// resolved, and verifier doesn't report field/reference/column not found errors for them.
boolean groupingResolved = Resolvables.resolved(groupings);
int size = groupingResolved ? aggregates.size() : aggregates.size() - groupings.size();
for (int i = 0; i < aggregates.size(); i++) {
NamedExpression agg = aggregates.get(i);
if (i < size) { // Skip resolving references to groupings in the aggregations if the groupings are not resolved yet.
agg = (NamedExpression) agg.transformUp(UnresolvedAttribute.class, ua -> {
Expression ne = ua;
Attribute maybeResolved = maybeResolveAttribute(ua, resolvedList);
// An item in aggregations can reference to groupings explicitly, if groupings are not resolved yet and
// maybeResolved is not resolved, return the original UnresolvedAttribute, so that it has another chance
// to get resolved in the next iteration.
// For example STATS c = count(emp_no), x = d::int + 1 BY d = (date == "2025-01-01")
if (groupingResolved || maybeResolved.resolved()) {
changed.set(true);
ne = maybeResolved;
}
return ne;
});
}
newAggregates.add(agg);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,11 @@
import org.elasticsearch.xpack.esql.expression.function.fulltext.MatchOperator;
import org.elasticsearch.xpack.esql.expression.function.fulltext.MultiMatch;
import org.elasticsearch.xpack.esql.expression.function.fulltext.QueryString;
import org.elasticsearch.xpack.esql.expression.function.grouping.Bucket;
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToInteger;
import org.elasticsearch.xpack.esql.expression.function.scalar.string.Concat;
import org.elasticsearch.xpack.esql.expression.function.scalar.string.Substring;
import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.Add;
import org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.Equals;
import org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.GreaterThan;
import org.elasticsearch.xpack.esql.index.EsIndex;
Expand Down Expand Up @@ -80,6 +83,7 @@
import org.elasticsearch.xpack.esql.session.IndexResolver;

import java.io.IOException;
import java.time.Period;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
Expand Down Expand Up @@ -110,6 +114,9 @@
import static org.elasticsearch.xpack.esql.analysis.AnalyzerTestUtils.randomValueOtherThanTest;
import static org.elasticsearch.xpack.esql.analysis.AnalyzerTestUtils.tsdbIndexResolution;
import static org.elasticsearch.xpack.esql.core.tree.Source.EMPTY;
import static org.elasticsearch.xpack.esql.core.type.DataType.DATETIME;
import static org.elasticsearch.xpack.esql.core.type.DataType.DATE_PERIOD;
import static org.elasticsearch.xpack.esql.type.EsqlDataTypeConverter.dateTimeToString;
import static org.hamcrest.Matchers.contains;
import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.empty;
Expand Down Expand Up @@ -368,7 +375,7 @@ public void testNoProjection() {
DataType.INTEGER,
DataType.KEYWORD,
DataType.TEXT,
DataType.DATETIME,
DATETIME,
DataType.TEXT,
DataType.KEYWORD,
DataType.INTEGER,
Expand Down Expand Up @@ -3789,6 +3796,147 @@ public void testResolveCompletionOutputField() {
assertThat(getAttributeByName(esRelation.output(), "description"), not(equalTo(completion.targetField())));
}

public void testResolveGroupingsBeforeResolvingImplicitReferencesToGroupings() {
var plan = analyze("""
FROM test
| EVAL date = "2025-01-01"::datetime
| STATS c = count(emp_no) BY d = (date == "2025-01-01")
""", "mapping-default.json");

var limit = as(plan, Limit.class);
var agg = as(limit.child(), Aggregate.class);
var aggregates = agg.aggregates();
assertThat(aggregates, hasSize(2));
Alias a = as(aggregates.get(0), Alias.class);
assertEquals("c", a.name());
Count c = as(a.child(), Count.class);
FieldAttribute fa = as(c.field(), FieldAttribute.class);
assertEquals("emp_no", fa.name());
ReferenceAttribute ra = as(aggregates.get(1), ReferenceAttribute.class); // reference in aggregates is resolved
assertEquals("d", ra.name());
List<Expression> groupings = agg.groupings();
assertEquals(1, groupings.size());
a = as(groupings.get(0), Alias.class); // reference in groupings is resolved
assertEquals("d", ra.name());
Equals equals = as(a.child(), Equals.class);
ra = as(equals.left(), ReferenceAttribute.class);
assertEquals("date", ra.name());
Literal literal = as(equals.right(), Literal.class);
assertEquals("2025-01-01T00:00:00.000Z", dateTimeToString(Long.parseLong(literal.value().toString())));
assertEquals(DATETIME, literal.dataType());
}

public void testResolveGroupingsBeforeResolvingExplicitReferencesToGroupings() {
var plan = analyze("""
FROM test
| EVAL date = "2025-01-01"::datetime
| STATS c = count(emp_no), x = d::int + 1 BY d = (date == "2025-01-01")
""", "mapping-default.json");

var limit = as(plan, Limit.class);
var agg = as(limit.child(), Aggregate.class);
var aggregates = agg.aggregates();
assertThat(aggregates, hasSize(3));
Alias a = as(aggregates.get(0), Alias.class);
assertEquals("c", a.name());
Count c = as(a.child(), Count.class);
FieldAttribute fa = as(c.field(), FieldAttribute.class);
assertEquals("emp_no", fa.name());
a = as(aggregates.get(1), Alias.class); // explicit reference to groupings is resolved
assertEquals("x", a.name());
Add add = as(a.child(), Add.class);
ToInteger toInteger = as(add.left(), ToInteger.class);
ReferenceAttribute ra = as(toInteger.field(), ReferenceAttribute.class);
assertEquals("d", ra.name());
ra = as(aggregates.get(2), ReferenceAttribute.class); // reference in aggregates is resolved
assertEquals("d", ra.name());
List<Expression> groupings = agg.groupings();
assertEquals(1, groupings.size());
a = as(groupings.get(0), Alias.class); // reference in groupings is resolved
assertEquals("d", ra.name());
Equals equals = as(a.child(), Equals.class);
ra = as(equals.left(), ReferenceAttribute.class);
assertEquals("date", ra.name());
Literal literal = as(equals.right(), Literal.class);
assertEquals("2025-01-01T00:00:00.000Z", dateTimeToString(Long.parseLong(literal.value().toString())));
assertEquals(DATETIME, literal.dataType());
}

public void testBucketWithIntervalInStringInBothAggregationAndGrouping() {
var plan = analyze("""
FROM test
| STATS c = count(emp_no), b = BUCKET(hire_date, "1 year") + 1 year BY yr = BUCKET(hire_date, "1 year")
""", "mapping-default.json");

var limit = as(plan, Limit.class);
var agg = as(limit.child(), Aggregate.class);
var aggregates = agg.aggregates();
assertThat(aggregates, hasSize(3));
Alias a = as(aggregates.get(0), Alias.class);
assertEquals("c", a.name());
Count c = as(a.child(), Count.class);
FieldAttribute fa = as(c.field(), FieldAttribute.class);
assertEquals("emp_no", fa.name());
a = as(aggregates.get(1), Alias.class); // explicit reference to groupings is resolved
assertEquals("b", a.name());
Add add = as(a.child(), Add.class);
Bucket bucket = as(add.left(), Bucket.class);
fa = as(bucket.field(), FieldAttribute.class);
assertEquals("hire_date", fa.name());
Literal literal = as(bucket.buckets(), Literal.class);
Literal oneYear = new Literal(EMPTY, Period.ofYears(1), DATE_PERIOD);
assertEquals(oneYear, literal);
literal = as(add.right(), Literal.class);
assertEquals(oneYear, literal);
ReferenceAttribute ra = as(aggregates.get(2), ReferenceAttribute.class); // reference in aggregates is resolved
assertEquals("yr", ra.name());
List<Expression> groupings = agg.groupings();
assertEquals(1, groupings.size());
a = as(groupings.get(0), Alias.class); // reference in groupings is resolved
assertEquals("yr", ra.name());
bucket = as(a.child(), Bucket.class);
fa = as(bucket.field(), FieldAttribute.class);
assertEquals("hire_date", fa.name());
literal = as(bucket.buckets(), Literal.class);
assertEquals(oneYear, literal);
}

public void testBucketWithIntervalInStringInGroupingReferencedInAggregation() {
var plan = analyze("""
FROM test
| STATS c = count(emp_no), b = yr + 1 year BY yr = BUCKET(hire_date, "1 year")
""", "mapping-default.json");

var limit = as(plan, Limit.class);
var agg = as(limit.child(), Aggregate.class);
var aggregates = agg.aggregates();
assertThat(aggregates, hasSize(3));
Alias a = as(aggregates.get(0), Alias.class);
assertEquals("c", a.name());
Count c = as(a.child(), Count.class);
FieldAttribute fa = as(c.field(), FieldAttribute.class);
assertEquals("emp_no", fa.name());
a = as(aggregates.get(1), Alias.class); // explicit reference to groupings is resolved
assertEquals("b", a.name());
Add add = as(a.child(), Add.class);
ReferenceAttribute ra = as(add.left(), ReferenceAttribute.class);
assertEquals("yr", ra.name());
Literal oneYear = new Literal(EMPTY, Period.ofYears(1), DATE_PERIOD);
Literal literal = as(add.right(), Literal.class);
assertEquals(oneYear, literal);
ra = as(aggregates.get(2), ReferenceAttribute.class); // reference in aggregates is resolved
assertEquals("yr", ra.name());
List<Expression> groupings = agg.groupings();
assertEquals(1, groupings.size());
a = as(groupings.get(0), Alias.class); // reference in groupings is resolved
assertEquals("yr", ra.name());
Bucket bucket = as(a.child(), Bucket.class);
fa = as(bucket.field(), FieldAttribute.class);
assertEquals("hire_date", fa.name());
literal = as(bucket.buckets(), Literal.class);
assertEquals(oneYear, literal);
}

@Override
protected IndexAnalyzers createDefaultIndexAnalyzers() {
return super.createDefaultIndexAnalyzers();
Expand Down
Loading