diff --git a/docs/changelog/127524.yaml b/docs/changelog/127524.yaml new file mode 100644 index 0000000000000..d11599ddcde58 --- /dev/null +++ b/docs/changelog/127524.yaml @@ -0,0 +1,6 @@ +pr: 127524 +summary: Resolve groupings in aggregate before resolving references to groupings in + the aggregations +area: ES|QL +type: bug +issues: [] diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/bucket.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/bucket.csv-spec index f860b1518750c..49b16baf30f58 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/bucket.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/bucket.csv-spec @@ -850,3 +850,37 @@ c:long | b:date 11 | 1984-05-01T00:00:00.000Z 11 | 1991-01-01T00:00:00.000Z ; + +resolveGroupingsBeforeResolvingImplicitReferencesToGroupings +required_capability: resolve_groupings_before_resolving_references_to_groupings_in_aggregations + +FROM employees +| STATS c = count(emp_no), b = BUCKET(hire_date, "1 year") + 1 year BY yr = BUCKET(hire_date, "1 year") +| SORT yr +| LIMIT 5 +; + +c:long | b:datetime | yr:datetime +11 | 1986-01-01T00:00:00.000Z | 1985-01-01T00:00:00.000Z +11 | 1987-01-01T00:00:00.000Z | 1986-01-01T00:00:00.000Z +15 | 1988-01-01T00:00:00.000Z | 1987-01-01T00:00:00.000Z +9 | 1989-01-01T00:00:00.000Z | 1988-01-01T00:00:00.000Z +13 | 1990-01-01T00:00:00.000Z | 1989-01-01T00:00:00.000Z +; + +resolveGroupingsBeforeResolvingExplicitReferencesToGroupings +required_capability: resolve_groupings_before_resolving_references_to_groupings_in_aggregations + +FROM employees +| STATS c = count(emp_no), b = yr + 1 year BY yr = BUCKET(hire_date, "1 year") +| SORT yr +| LIMIT 5 +; + +c:long | b:datetime | yr:datetime +11 | 1986-01-01T00:00:00.000Z | 1985-01-01T00:00:00.000Z +11 | 1987-01-01T00:00:00.000Z | 1986-01-01T00:00:00.000Z +15 | 1988-01-01T00:00:00.000Z | 1987-01-01T00:00:00.000Z +9 | 1989-01-01T00:00:00.000Z | 1988-01-01T00:00:00.000Z +13 | 1990-01-01T00:00:00.000Z | 1989-01-01T00:00:00.000Z +; diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec index 610baeca91c51..86dbf4734da02 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec @@ -3097,3 +3097,27 @@ ROW a = [1,2,3], b = 5 STD_DEV(a):double | STD_DEV(b):double 0.816496580927726 | 0.0 ; + +resolveGroupingsBeforeResolvingImplicitReferencesToGroupings +required_capability: resolve_groupings_before_resolving_references_to_groupings_in_aggregations + +FROM employees +| EVAL date = "2025-01-01"::datetime +| stats m = MAX(hire_date) BY d = (date == "2025-01-01") +; + +m:datetime | d:boolean +1999-04-30T00:00:00.000Z | true +; + +resolveGroupingsBeforeResolvingExplicitReferencesToGroupings +required_capability: resolve_groupings_before_resolving_references_to_groupings_in_aggregations + +FROM employees +| EVAL date = "2025-01-01"::datetime +| stats m = MAX(hire_date), x = d::int + 1 BY d = (date == "2025-01-01") +; + +m:datetime | x:integer | d:boolean +1999-04-30T00:00:00.000Z | 2 | true +; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index 186d288810e70..fe22f1737a7f9 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -1066,7 +1066,10 @@ public enum Cap { */ FIRST_OVER_TIME(Build.current().isSnapshot()), - ; + /** + * Resolve groupings before resolving references to groupings in the aggregations. + */ + RESOLVE_GROUPINGS_BEFORE_RESOLVING_REFERENCES_TO_GROUPINGS_IN_AGGREGATIONS; private final boolean enabled; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java index 1e0577193cab2..3e2ffa706b441 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java @@ -173,13 +173,8 @@ public class Analyzer extends ParameterizedRuleExecutor( "Resolution", - /* - * ImplicitCasting must be before ResolveRefs. Because a reference is created for a Bucket in Aggregate's aggregates, - * resolving this reference before implicit casting may cause this reference to have customMessage=true, it prevents further - * attempts to resolve this reference. - */ - new ImplicitCasting(), new ResolveRefs(), + new ImplicitCasting(), new ResolveUnionTypes() // Must be after ResolveRefs, so union types can be found ), new Batch<>("Finish Analysis", Limiter.ONCE, new AddImplicitLimit(), new AddImplicitForkLimit(), new UnionTypesCleanup()) @@ -574,7 +569,7 @@ private Aggregate resolveAggregate(Aggregate aggregate, List children } } - if (Resolvables.resolved(groupings) == false || (Resolvables.resolved(aggregates) == false)) { + if (Resolvables.resolved(groupings) == false || Resolvables.resolved(aggregates) == false) { ArrayList resolved = new ArrayList<>(); for (Expression e : groupings) { Attribute attr = Expressions.attribute(e); @@ -585,17 +580,29 @@ private Aggregate resolveAggregate(Aggregate aggregate, List children List resolvedList = NamedExpressions.mergeOutputAttributes(resolved, childrenOutput); List newAggregates = new ArrayList<>(); - for (NamedExpression ag : aggregate.aggregates()) { - var agg = (NamedExpression) ag.transformUp(UnresolvedAttribute.class, ua -> { - Expression ne = ua; - Attribute maybeResolved = maybeResolveAttribute(ua, resolvedList); - if (maybeResolved != null) { - changed.set(true); - ne = maybeResolved; - } - return ne; - }); - newAggregates.add(agg); + // If the groupings are not resolved, skip the resolution of the references to groupings in the aggregates, resolve the + // aggregations that do not reference to groupings, so that the fields/attributes referenced by the aggregations can be + // resolved, and verifier doesn't report field/reference/column not found errors for them. + boolean groupingResolved = Resolvables.resolved(groupings); + int size = groupingResolved ? aggregates.size() : aggregates.size() - groupings.size(); + for (int i = 0; i < aggregates.size(); i++) { + NamedExpression maybeResolvedAgg = aggregates.get(i); + if (i < size) { // Skip resolving references to groupings in the aggregations if the groupings are not resolved yet. + maybeResolvedAgg = (NamedExpression) maybeResolvedAgg.transformUp(UnresolvedAttribute.class, ua -> { + Expression ne = ua; + Attribute maybeResolved = maybeResolveAttribute(ua, resolvedList); + // An item in aggregations can reference to groupings explicitly, if groupings are not resolved yet and + // maybeResolved is not resolved, return the original UnresolvedAttribute, so that it has another chance + // to get resolved in the next iteration. + // For example STATS c = count(emp_no), x = d::int + 1 BY d = (date == "2025-01-01") + if (groupingResolved || maybeResolved.resolved()) { + changed.set(true); + ne = maybeResolved; + } + return ne; + }); + } + newAggregates.add(maybeResolvedAgg); } // TODO: remove this when Stats interface is removed diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java index 3a5f133702f57..f3afa24969f33 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java @@ -47,8 +47,11 @@ import org.elasticsearch.xpack.esql.expression.function.fulltext.MatchOperator; import org.elasticsearch.xpack.esql.expression.function.fulltext.MultiMatch; import org.elasticsearch.xpack.esql.expression.function.fulltext.QueryString; +import org.elasticsearch.xpack.esql.expression.function.grouping.Bucket; +import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToInteger; import org.elasticsearch.xpack.esql.expression.function.scalar.string.Concat; import org.elasticsearch.xpack.esql.expression.function.scalar.string.Substring; +import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.Add; import org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.Equals; import org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.GreaterThan; import org.elasticsearch.xpack.esql.index.EsIndex; @@ -80,6 +83,7 @@ import org.elasticsearch.xpack.esql.session.IndexResolver; import java.io.IOException; +import java.time.Period; import java.util.ArrayList; import java.util.List; import java.util.Map; @@ -110,6 +114,9 @@ import static org.elasticsearch.xpack.esql.analysis.AnalyzerTestUtils.randomValueOtherThanTest; import static org.elasticsearch.xpack.esql.analysis.AnalyzerTestUtils.tsdbIndexResolution; import static org.elasticsearch.xpack.esql.core.tree.Source.EMPTY; +import static org.elasticsearch.xpack.esql.core.type.DataType.DATETIME; +import static org.elasticsearch.xpack.esql.core.type.DataType.DATE_PERIOD; +import static org.elasticsearch.xpack.esql.type.EsqlDataTypeConverter.dateTimeToString; import static org.hamcrest.Matchers.contains; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.empty; @@ -368,7 +375,7 @@ public void testNoProjection() { DataType.INTEGER, DataType.KEYWORD, DataType.TEXT, - DataType.DATETIME, + DATETIME, DataType.TEXT, DataType.KEYWORD, DataType.INTEGER, @@ -3789,6 +3796,147 @@ public void testResolveCompletionOutputField() { assertThat(getAttributeByName(esRelation.output(), "description"), not(equalTo(completion.targetField()))); } + public void testResolveGroupingsBeforeResolvingImplicitReferencesToGroupings() { + var plan = analyze(""" + FROM test + | EVAL date = "2025-01-01"::datetime + | STATS c = count(emp_no) BY d = (date == "2025-01-01") + """, "mapping-default.json"); + + var limit = as(plan, Limit.class); + var agg = as(limit.child(), Aggregate.class); + var aggregates = agg.aggregates(); + assertThat(aggregates, hasSize(2)); + Alias a = as(aggregates.get(0), Alias.class); + assertEquals("c", a.name()); + Count c = as(a.child(), Count.class); + FieldAttribute fa = as(c.field(), FieldAttribute.class); + assertEquals("emp_no", fa.name()); + ReferenceAttribute ra = as(aggregates.get(1), ReferenceAttribute.class); // reference in aggregates is resolved + assertEquals("d", ra.name()); + List groupings = agg.groupings(); + assertEquals(1, groupings.size()); + a = as(groupings.get(0), Alias.class); // reference in groupings is resolved + assertEquals("d", ra.name()); + Equals equals = as(a.child(), Equals.class); + ra = as(equals.left(), ReferenceAttribute.class); + assertEquals("date", ra.name()); + Literal literal = as(equals.right(), Literal.class); + assertEquals("2025-01-01T00:00:00.000Z", dateTimeToString(Long.parseLong(literal.value().toString()))); + assertEquals(DATETIME, literal.dataType()); + } + + public void testResolveGroupingsBeforeResolvingExplicitReferencesToGroupings() { + var plan = analyze(""" + FROM test + | EVAL date = "2025-01-01"::datetime + | STATS c = count(emp_no), x = d::int + 1 BY d = (date == "2025-01-01") + """, "mapping-default.json"); + + var limit = as(plan, Limit.class); + var agg = as(limit.child(), Aggregate.class); + var aggregates = agg.aggregates(); + assertThat(aggregates, hasSize(3)); + Alias a = as(aggregates.get(0), Alias.class); + assertEquals("c", a.name()); + Count c = as(a.child(), Count.class); + FieldAttribute fa = as(c.field(), FieldAttribute.class); + assertEquals("emp_no", fa.name()); + a = as(aggregates.get(1), Alias.class); // explicit reference to groupings is resolved + assertEquals("x", a.name()); + Add add = as(a.child(), Add.class); + ToInteger toInteger = as(add.left(), ToInteger.class); + ReferenceAttribute ra = as(toInteger.field(), ReferenceAttribute.class); + assertEquals("d", ra.name()); + ra = as(aggregates.get(2), ReferenceAttribute.class); // reference in aggregates is resolved + assertEquals("d", ra.name()); + List groupings = agg.groupings(); + assertEquals(1, groupings.size()); + a = as(groupings.get(0), Alias.class); // reference in groupings is resolved + assertEquals("d", ra.name()); + Equals equals = as(a.child(), Equals.class); + ra = as(equals.left(), ReferenceAttribute.class); + assertEquals("date", ra.name()); + Literal literal = as(equals.right(), Literal.class); + assertEquals("2025-01-01T00:00:00.000Z", dateTimeToString(Long.parseLong(literal.value().toString()))); + assertEquals(DATETIME, literal.dataType()); + } + + public void testBucketWithIntervalInStringInBothAggregationAndGrouping() { + var plan = analyze(""" + FROM test + | STATS c = count(emp_no), b = BUCKET(hire_date, "1 year") + 1 year BY yr = BUCKET(hire_date, "1 year") + """, "mapping-default.json"); + + var limit = as(plan, Limit.class); + var agg = as(limit.child(), Aggregate.class); + var aggregates = agg.aggregates(); + assertThat(aggregates, hasSize(3)); + Alias a = as(aggregates.get(0), Alias.class); + assertEquals("c", a.name()); + Count c = as(a.child(), Count.class); + FieldAttribute fa = as(c.field(), FieldAttribute.class); + assertEquals("emp_no", fa.name()); + a = as(aggregates.get(1), Alias.class); // explicit reference to groupings is resolved + assertEquals("b", a.name()); + Add add = as(a.child(), Add.class); + Bucket bucket = as(add.left(), Bucket.class); + fa = as(bucket.field(), FieldAttribute.class); + assertEquals("hire_date", fa.name()); + Literal literal = as(bucket.buckets(), Literal.class); + Literal oneYear = new Literal(EMPTY, Period.ofYears(1), DATE_PERIOD); + assertEquals(oneYear, literal); + literal = as(add.right(), Literal.class); + assertEquals(oneYear, literal); + ReferenceAttribute ra = as(aggregates.get(2), ReferenceAttribute.class); // reference in aggregates is resolved + assertEquals("yr", ra.name()); + List groupings = agg.groupings(); + assertEquals(1, groupings.size()); + a = as(groupings.get(0), Alias.class); // reference in groupings is resolved + assertEquals("yr", ra.name()); + bucket = as(a.child(), Bucket.class); + fa = as(bucket.field(), FieldAttribute.class); + assertEquals("hire_date", fa.name()); + literal = as(bucket.buckets(), Literal.class); + assertEquals(oneYear, literal); + } + + public void testBucketWithIntervalInStringInGroupingReferencedInAggregation() { + var plan = analyze(""" + FROM test + | STATS c = count(emp_no), b = yr + 1 year BY yr = BUCKET(hire_date, "1 year") + """, "mapping-default.json"); + + var limit = as(plan, Limit.class); + var agg = as(limit.child(), Aggregate.class); + var aggregates = agg.aggregates(); + assertThat(aggregates, hasSize(3)); + Alias a = as(aggregates.get(0), Alias.class); + assertEquals("c", a.name()); + Count c = as(a.child(), Count.class); + FieldAttribute fa = as(c.field(), FieldAttribute.class); + assertEquals("emp_no", fa.name()); + a = as(aggregates.get(1), Alias.class); // explicit reference to groupings is resolved + assertEquals("b", a.name()); + Add add = as(a.child(), Add.class); + ReferenceAttribute ra = as(add.left(), ReferenceAttribute.class); + assertEquals("yr", ra.name()); + Literal oneYear = new Literal(EMPTY, Period.ofYears(1), DATE_PERIOD); + Literal literal = as(add.right(), Literal.class); + assertEquals(oneYear, literal); + ra = as(aggregates.get(2), ReferenceAttribute.class); // reference in aggregates is resolved + assertEquals("yr", ra.name()); + List groupings = agg.groupings(); + assertEquals(1, groupings.size()); + a = as(groupings.get(0), Alias.class); // reference in groupings is resolved + assertEquals("yr", ra.name()); + Bucket bucket = as(a.child(), Bucket.class); + fa = as(bucket.field(), FieldAttribute.class); + assertEquals("hire_date", fa.name()); + literal = as(bucket.buckets(), Literal.class); + assertEquals(oneYear, literal); + } + @Override protected IndexAnalyzers createDefaultIndexAnalyzers() { return super.createDefaultIndexAnalyzers();