Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions docs/changelog/127524.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 127524
summary: Resolve groupings in aggregate before resolving references to groupings in
the aggregations
area: ES|QL
type: bug
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -815,3 +815,37 @@ c:long |b:date
0 |null
1 |1965-01-01T00:00:00.000Z
;

resolveGroupingsBeforeResolvingImplicitReferencesToGroupings
required_capability: resolve_groupings_before_resolving_references_to_groupings_in_aggregations

FROM employees
| STATS c = count(emp_no), b = BUCKET(hire_date, "1 year") + 1 year BY yr = BUCKET(hire_date, "1 year")
| SORT yr
| LIMIT 5
;

c:long | b:datetime | yr:datetime
11 | 1986-01-01T00:00:00.000Z | 1985-01-01T00:00:00.000Z
11 | 1987-01-01T00:00:00.000Z | 1986-01-01T00:00:00.000Z
15 | 1988-01-01T00:00:00.000Z | 1987-01-01T00:00:00.000Z
9 | 1989-01-01T00:00:00.000Z | 1988-01-01T00:00:00.000Z
13 | 1990-01-01T00:00:00.000Z | 1989-01-01T00:00:00.000Z
;

resolveGroupingsBeforeResolvingExplicitReferencesToGroupings
required_capability: resolve_groupings_before_resolving_references_to_groupings_in_aggregations

FROM employees
| STATS c = count(emp_no), b = yr + 1 year BY yr = BUCKET(hire_date, "1 year")
| SORT yr
| LIMIT 5
;

c:long | b:datetime | yr:datetime
11 | 1986-01-01T00:00:00.000Z | 1985-01-01T00:00:00.000Z
11 | 1987-01-01T00:00:00.000Z | 1986-01-01T00:00:00.000Z
15 | 1988-01-01T00:00:00.000Z | 1987-01-01T00:00:00.000Z
9 | 1989-01-01T00:00:00.000Z | 1988-01-01T00:00:00.000Z
13 | 1990-01-01T00:00:00.000Z | 1989-01-01T00:00:00.000Z
;
Original file line number Diff line number Diff line change
Expand Up @@ -3097,3 +3097,27 @@ ROW a = [1,2,3], b = 5
STD_DEV(a):double | STD_DEV(b):double
0.816496580927726 | 0.0
;

resolveGroupingsBeforeResolvingImplicitReferencesToGroupings
required_capability: resolve_groupings_before_resolving_references_to_groupings_in_aggregations

FROM employees
| EVAL date = "2025-01-01"::datetime
| stats m = MAX(hire_date) BY d = (date == "2025-01-01")
;

m:datetime | d:boolean
1999-04-30T00:00:00.000Z | true
;

resolveGroupingsBeforeResolvingExplicitReferencesToGroupings
required_capability: resolve_groupings_before_resolving_references_to_groupings_in_aggregations

FROM employees
| EVAL date = "2025-01-01"::datetime
| stats m = MAX(hire_date), x = d::int + 1 BY d = (date == "2025-01-01")
;

m:datetime | x:integer | d:boolean
1999-04-30T00:00:00.000Z | 2 | true
;
Original file line number Diff line number Diff line change
Expand Up @@ -838,7 +838,12 @@ public enum Cap {
* During resolution (pre-analysis) we have to consider that joins or enriches can override EVALuated values
* https://github.com/elastic/elasticsearch/issues/126419
*/
FIX_JOIN_MASKING_EVAL;
FIX_JOIN_MASKING_EVAL,

/**
* Resolve groupings before resolving references to groupings in the aggregations.
*/
RESOLVE_GROUPINGS_BEFORE_RESOLVING_REFERENCES_TO_GROUPINGS_IN_AGGREGATIONS;

private final boolean enabled;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -161,13 +161,8 @@ public class Analyzer extends ParameterizedRuleExecutor<LogicalPlan, AnalyzerCon
);
var resolution = new Batch<>(
"Resolution",
/*
* ImplicitCasting must be before ResolveRefs. Because a reference is created for a Bucket in Aggregate's aggregates,
* resolving this reference before implicit casting may cause this reference to have customMessage=true, it prevents further
* attempts to resolve this reference.
*/
new ImplicitCasting(),
new ResolveRefs(),
new ImplicitCasting(),
new ResolveUnionTypes() // Must be after ResolveRefs, so union types can be found
);
var finish = new Batch<>("Finish Analysis", Limiter.ONCE, new AddImplicitLimit(), new UnionTypesCleanup());
Expand Down Expand Up @@ -511,7 +506,7 @@ private Aggregate resolveAggregate(Aggregate aggregate, List<Attribute> children
}
}

if (Resolvables.resolved(groupings) == false || (Resolvables.resolved(aggregates) == false)) {
if (Resolvables.resolved(groupings) == false || Resolvables.resolved(aggregates) == false) {
ArrayList<Attribute> resolved = new ArrayList<>();
for (Expression e : groupings) {
Attribute attr = Expressions.attribute(e);
Expand All @@ -522,17 +517,29 @@ private Aggregate resolveAggregate(Aggregate aggregate, List<Attribute> children
List<Attribute> resolvedList = NamedExpressions.mergeOutputAttributes(resolved, childrenOutput);

List<NamedExpression> newAggregates = new ArrayList<>();
for (NamedExpression ag : aggregate.aggregates()) {
var agg = (NamedExpression) ag.transformUp(UnresolvedAttribute.class, ua -> {
Expression ne = ua;
Attribute maybeResolved = maybeResolveAttribute(ua, resolvedList);
if (maybeResolved != null) {
changed.set(true);
ne = maybeResolved;
}
return ne;
});
newAggregates.add(agg);
// If the groupings are not resolved, skip the resolution of the references to groupings in the aggregates, resolve the
// aggregations that do not reference to groupings, so that the fields/attributes referenced by the aggregations can be
// resolved, and verifier doesn't report field/reference/column not found errors for them.
boolean groupingResolved = Resolvables.resolved(groupings);
int size = groupingResolved ? aggregates.size() : aggregates.size() - groupings.size();
for (int i = 0; i < aggregates.size(); i++) {
NamedExpression maybeResolvedAgg = aggregates.get(i);
if (i < size) { // Skip resolving references to groupings in the aggregations if the groupings are not resolved yet.
maybeResolvedAgg = (NamedExpression) maybeResolvedAgg.transformUp(UnresolvedAttribute.class, ua -> {
Expression ne = ua;
Attribute maybeResolved = maybeResolveAttribute(ua, resolvedList);
// An item in aggregations can reference to groupings explicitly, if groupings are not resolved yet and
// maybeResolved is not resolved, return the original UnresolvedAttribute, so that it has another chance
// to get resolved in the next iteration.
// For example STATS c = count(emp_no), x = d::int + 1 BY d = (date == "2025-01-01")
if (groupingResolved || maybeResolved.resolved()) {
changed.set(true);
ne = maybeResolved;
}
return ne;
});
}
newAggregates.add(maybeResolvedAgg);
}

// TODO: remove this when Stats interface is removed
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import org.elasticsearch.xpack.esql.core.expression.Attribute;
import org.elasticsearch.xpack.esql.core.expression.AttributeSet;
import org.elasticsearch.xpack.esql.core.expression.EntryExpression;
import org.elasticsearch.xpack.esql.core.expression.Expression;
import org.elasticsearch.xpack.esql.core.expression.Expressions;
import org.elasticsearch.xpack.esql.core.expression.FieldAttribute;
import org.elasticsearch.xpack.esql.core.expression.Literal;
Expand All @@ -44,6 +45,10 @@
import org.elasticsearch.xpack.esql.expression.function.fulltext.Match;
import org.elasticsearch.xpack.esql.expression.function.fulltext.MatchOperator;
import org.elasticsearch.xpack.esql.expression.function.fulltext.QueryString;
import org.elasticsearch.xpack.esql.expression.function.grouping.Bucket;
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToInteger;
import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.Add;
import org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.Equals;
import org.elasticsearch.xpack.esql.index.EsIndex;
import org.elasticsearch.xpack.esql.index.IndexResolution;
import org.elasticsearch.xpack.esql.parser.ParsingException;
Expand All @@ -66,6 +71,7 @@

import java.io.IOException;
import java.io.InputStream;
import java.time.Period;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
Expand All @@ -92,6 +98,9 @@
import static org.elasticsearch.xpack.esql.analysis.AnalyzerTestUtils.loadMapping;
import static org.elasticsearch.xpack.esql.analysis.AnalyzerTestUtils.tsdbIndexResolution;
import static org.elasticsearch.xpack.esql.core.tree.Source.EMPTY;
import static org.elasticsearch.xpack.esql.core.type.DataType.DATETIME;
import static org.elasticsearch.xpack.esql.core.type.DataType.DATE_PERIOD;
import static org.elasticsearch.xpack.esql.type.EsqlDataTypeConverter.dateTimeToString;
import static org.hamcrest.Matchers.contains;
import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.equalTo;
Expand Down Expand Up @@ -347,7 +356,7 @@ public void testNoProjection() {
DataType.INTEGER,
DataType.KEYWORD,
DataType.TEXT,
DataType.DATETIME,
DATETIME,
DataType.TEXT,
DataType.KEYWORD,
DataType.INTEGER,
Expand Down Expand Up @@ -2819,6 +2828,147 @@ public void testFunctionNamedParamsAsFunctionArgument1() {
assertEquals(DataType.DOUBLE, ee.dataType());
}

public void testResolveGroupingsBeforeResolvingImplicitReferencesToGroupings() {
var plan = analyze("""
FROM test
| EVAL date = "2025-01-01"::datetime
| STATS c = count(emp_no) BY d = (date == "2025-01-01")
""", "mapping-default.json");

var limit = as(plan, Limit.class);
var agg = as(limit.child(), Aggregate.class);
var aggregates = agg.aggregates();
assertThat(aggregates, hasSize(2));
Alias a = as(aggregates.get(0), Alias.class);
assertEquals("c", a.name());
Count c = as(a.child(), Count.class);
FieldAttribute fa = as(c.field(), FieldAttribute.class);
assertEquals("emp_no", fa.name());
ReferenceAttribute ra = as(aggregates.get(1), ReferenceAttribute.class); // reference in aggregates is resolved
assertEquals("d", ra.name());
List<Expression> groupings = agg.groupings();
assertEquals(1, groupings.size());
a = as(groupings.get(0), Alias.class); // reference in groupings is resolved
assertEquals("d", ra.name());
Equals equals = as(a.child(), Equals.class);
ra = as(equals.left(), ReferenceAttribute.class);
assertEquals("date", ra.name());
Literal literal = as(equals.right(), Literal.class);
assertEquals("2025-01-01T00:00:00.000Z", dateTimeToString(Long.parseLong(literal.value().toString())));
assertEquals(DATETIME, literal.dataType());
}

public void testResolveGroupingsBeforeResolvingExplicitReferencesToGroupings() {
var plan = analyze("""
FROM test
| EVAL date = "2025-01-01"::datetime
| STATS c = count(emp_no), x = d::int + 1 BY d = (date == "2025-01-01")
""", "mapping-default.json");

var limit = as(plan, Limit.class);
var agg = as(limit.child(), Aggregate.class);
var aggregates = agg.aggregates();
assertThat(aggregates, hasSize(3));
Alias a = as(aggregates.get(0), Alias.class);
assertEquals("c", a.name());
Count c = as(a.child(), Count.class);
FieldAttribute fa = as(c.field(), FieldAttribute.class);
assertEquals("emp_no", fa.name());
a = as(aggregates.get(1), Alias.class); // explicit reference to groupings is resolved
assertEquals("x", a.name());
Add add = as(a.child(), Add.class);
ToInteger toInteger = as(add.left(), ToInteger.class);
ReferenceAttribute ra = as(toInteger.field(), ReferenceAttribute.class);
assertEquals("d", ra.name());
ra = as(aggregates.get(2), ReferenceAttribute.class); // reference in aggregates is resolved
assertEquals("d", ra.name());
List<Expression> groupings = agg.groupings();
assertEquals(1, groupings.size());
a = as(groupings.get(0), Alias.class); // reference in groupings is resolved
assertEquals("d", ra.name());
Equals equals = as(a.child(), Equals.class);
ra = as(equals.left(), ReferenceAttribute.class);
assertEquals("date", ra.name());
Literal literal = as(equals.right(), Literal.class);
assertEquals("2025-01-01T00:00:00.000Z", dateTimeToString(Long.parseLong(literal.value().toString())));
assertEquals(DATETIME, literal.dataType());
}

public void testBucketWithIntervalInStringInBothAggregationAndGrouping() {
var plan = analyze("""
FROM test
| STATS c = count(emp_no), b = BUCKET(hire_date, "1 year") + 1 year BY yr = BUCKET(hire_date, "1 year")
""", "mapping-default.json");

var limit = as(plan, Limit.class);
var agg = as(limit.child(), Aggregate.class);
var aggregates = agg.aggregates();
assertThat(aggregates, hasSize(3));
Alias a = as(aggregates.get(0), Alias.class);
assertEquals("c", a.name());
Count c = as(a.child(), Count.class);
FieldAttribute fa = as(c.field(), FieldAttribute.class);
assertEquals("emp_no", fa.name());
a = as(aggregates.get(1), Alias.class); // explicit reference to groupings is resolved
assertEquals("b", a.name());
Add add = as(a.child(), Add.class);
Bucket bucket = as(add.left(), Bucket.class);
fa = as(bucket.field(), FieldAttribute.class);
assertEquals("hire_date", fa.name());
Literal literal = as(bucket.buckets(), Literal.class);
Literal oneYear = new Literal(EMPTY, Period.ofYears(1), DATE_PERIOD);
assertEquals(oneYear, literal);
literal = as(add.right(), Literal.class);
assertEquals(oneYear, literal);
ReferenceAttribute ra = as(aggregates.get(2), ReferenceAttribute.class); // reference in aggregates is resolved
assertEquals("yr", ra.name());
List<Expression> groupings = agg.groupings();
assertEquals(1, groupings.size());
a = as(groupings.get(0), Alias.class); // reference in groupings is resolved
assertEquals("yr", ra.name());
bucket = as(a.child(), Bucket.class);
fa = as(bucket.field(), FieldAttribute.class);
assertEquals("hire_date", fa.name());
literal = as(bucket.buckets(), Literal.class);
assertEquals(oneYear, literal);
}

public void testBucketWithIntervalInStringInGroupingReferencedInAggregation() {
var plan = analyze("""
FROM test
| STATS c = count(emp_no), b = yr + 1 year BY yr = BUCKET(hire_date, "1 year")
""", "mapping-default.json");

var limit = as(plan, Limit.class);
var agg = as(limit.child(), Aggregate.class);
var aggregates = agg.aggregates();
assertThat(aggregates, hasSize(3));
Alias a = as(aggregates.get(0), Alias.class);
assertEquals("c", a.name());
Count c = as(a.child(), Count.class);
FieldAttribute fa = as(c.field(), FieldAttribute.class);
assertEquals("emp_no", fa.name());
a = as(aggregates.get(1), Alias.class); // explicit reference to groupings is resolved
assertEquals("b", a.name());
Add add = as(a.child(), Add.class);
ReferenceAttribute ra = as(add.left(), ReferenceAttribute.class);
assertEquals("yr", ra.name());
Literal oneYear = new Literal(EMPTY, Period.ofYears(1), DATE_PERIOD);
Literal literal = as(add.right(), Literal.class);
assertEquals(oneYear, literal);
ra = as(aggregates.get(2), ReferenceAttribute.class); // reference in aggregates is resolved
assertEquals("yr", ra.name());
List<Expression> groupings = agg.groupings();
assertEquals(1, groupings.size());
a = as(groupings.get(0), Alias.class); // reference in groupings is resolved
assertEquals("yr", ra.name());
Bucket bucket = as(a.child(), Bucket.class);
fa = as(bucket.field(), FieldAttribute.class);
assertEquals("hire_date", fa.name());
literal = as(bucket.buckets(), Literal.class);
assertEquals(oneYear, literal);
}

private void verifyUnsupported(String query, String errorMessage) {
verifyUnsupported(query, errorMessage, "mapping-multi-field-variation.json");
}
Expand Down