Skip to content

Commit 51ad5ab

Browse files
fang-xing-esqlywangd
authored andcommitted
[ES|QL] Resolve groupings in aggregate before resolving references to groupings in the aggregations (elastic#127524)
* resolve groupings before resolving references to groupings in the aggregations
1 parent 015075f commit 51ad5ab

File tree

6 files changed

+242
-20
lines changed

6 files changed

+242
-20
lines changed

docs/changelog/127524.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 127524
2+
summary: Resolve groupings in aggregate before resolving references to groupings in
3+
the aggregations
4+
area: ES|QL
5+
type: bug
6+
issues: []

x-pack/plugin/esql/qa/testFixtures/src/main/resources/bucket.csv-spec

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -850,3 +850,37 @@ c:long | b:date
850850
11 | 1984-05-01T00:00:00.000Z
851851
11 | 1991-01-01T00:00:00.000Z
852852
;
853+
854+
resolveGroupingsBeforeResolvingImplicitReferencesToGroupings
855+
required_capability: resolve_groupings_before_resolving_references_to_groupings_in_aggregations
856+
857+
FROM employees
858+
| STATS c = count(emp_no), b = BUCKET(hire_date, "1 year") + 1 year BY yr = BUCKET(hire_date, "1 year")
859+
| SORT yr
860+
| LIMIT 5
861+
;
862+
863+
c:long | b:datetime | yr:datetime
864+
11 | 1986-01-01T00:00:00.000Z | 1985-01-01T00:00:00.000Z
865+
11 | 1987-01-01T00:00:00.000Z | 1986-01-01T00:00:00.000Z
866+
15 | 1988-01-01T00:00:00.000Z | 1987-01-01T00:00:00.000Z
867+
9 | 1989-01-01T00:00:00.000Z | 1988-01-01T00:00:00.000Z
868+
13 | 1990-01-01T00:00:00.000Z | 1989-01-01T00:00:00.000Z
869+
;
870+
871+
resolveGroupingsBeforeResolvingExplicitReferencesToGroupings
872+
required_capability: resolve_groupings_before_resolving_references_to_groupings_in_aggregations
873+
874+
FROM employees
875+
| STATS c = count(emp_no), b = yr + 1 year BY yr = BUCKET(hire_date, "1 year")
876+
| SORT yr
877+
| LIMIT 5
878+
;
879+
880+
c:long | b:datetime | yr:datetime
881+
11 | 1986-01-01T00:00:00.000Z | 1985-01-01T00:00:00.000Z
882+
11 | 1987-01-01T00:00:00.000Z | 1986-01-01T00:00:00.000Z
883+
15 | 1988-01-01T00:00:00.000Z | 1987-01-01T00:00:00.000Z
884+
9 | 1989-01-01T00:00:00.000Z | 1988-01-01T00:00:00.000Z
885+
13 | 1990-01-01T00:00:00.000Z | 1989-01-01T00:00:00.000Z
886+
;

x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3097,3 +3097,27 @@ ROW a = [1,2,3], b = 5
30973097
STD_DEV(a):double | STD_DEV(b):double
30983098
0.816496580927726 | 0.0
30993099
;
3100+
3101+
resolveGroupingsBeforeResolvingImplicitReferencesToGroupings
3102+
required_capability: resolve_groupings_before_resolving_references_to_groupings_in_aggregations
3103+
3104+
FROM employees
3105+
| EVAL date = "2025-01-01"::datetime
3106+
| stats m = MAX(hire_date) BY d = (date == "2025-01-01")
3107+
;
3108+
3109+
m:datetime | d:boolean
3110+
1999-04-30T00:00:00.000Z | true
3111+
;
3112+
3113+
resolveGroupingsBeforeResolvingExplicitReferencesToGroupings
3114+
required_capability: resolve_groupings_before_resolving_references_to_groupings_in_aggregations
3115+
3116+
FROM employees
3117+
| EVAL date = "2025-01-01"::datetime
3118+
| stats m = MAX(hire_date), x = d::int + 1 BY d = (date == "2025-01-01")
3119+
;
3120+
3121+
m:datetime | x:integer | d:boolean
3122+
1999-04-30T00:00:00.000Z | 2 | true
3123+
;

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1066,7 +1066,10 @@ public enum Cap {
10661066
*/
10671067
FIRST_OVER_TIME(Build.current().isSnapshot()),
10681068

1069-
;
1069+
/**
1070+
* Resolve groupings before resolving references to groupings in the aggregations.
1071+
*/
1072+
RESOLVE_GROUPINGS_BEFORE_RESOLVING_REFERENCES_TO_GROUPINGS_IN_AGGREGATIONS;
10701073

10711074
private final boolean enabled;
10721075

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java

Lines changed: 25 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -173,13 +173,8 @@ public class Analyzer extends ParameterizedRuleExecutor<LogicalPlan, AnalyzerCon
173173
),
174174
new Batch<>(
175175
"Resolution",
176-
/*
177-
* ImplicitCasting must be before ResolveRefs. Because a reference is created for a Bucket in Aggregate's aggregates,
178-
* resolving this reference before implicit casting may cause this reference to have customMessage=true, it prevents further
179-
* attempts to resolve this reference.
180-
*/
181-
new ImplicitCasting(),
182176
new ResolveRefs(),
177+
new ImplicitCasting(),
183178
new ResolveUnionTypes() // Must be after ResolveRefs, so union types can be found
184179
),
185180
new Batch<>("Finish Analysis", Limiter.ONCE, new AddImplicitLimit(), new AddImplicitForkLimit(), new UnionTypesCleanup())
@@ -574,7 +569,7 @@ private Aggregate resolveAggregate(Aggregate aggregate, List<Attribute> children
574569
}
575570
}
576571

577-
if (Resolvables.resolved(groupings) == false || (Resolvables.resolved(aggregates) == false)) {
572+
if (Resolvables.resolved(groupings) == false || Resolvables.resolved(aggregates) == false) {
578573
ArrayList<Attribute> resolved = new ArrayList<>();
579574
for (Expression e : groupings) {
580575
Attribute attr = Expressions.attribute(e);
@@ -585,17 +580,29 @@ private Aggregate resolveAggregate(Aggregate aggregate, List<Attribute> children
585580
List<Attribute> resolvedList = NamedExpressions.mergeOutputAttributes(resolved, childrenOutput);
586581

587582
List<NamedExpression> newAggregates = new ArrayList<>();
588-
for (NamedExpression ag : aggregate.aggregates()) {
589-
var agg = (NamedExpression) ag.transformUp(UnresolvedAttribute.class, ua -> {
590-
Expression ne = ua;
591-
Attribute maybeResolved = maybeResolveAttribute(ua, resolvedList);
592-
if (maybeResolved != null) {
593-
changed.set(true);
594-
ne = maybeResolved;
595-
}
596-
return ne;
597-
});
598-
newAggregates.add(agg);
583+
// If the groupings are not resolved, skip the resolution of the references to groupings in the aggregates, resolve the
584+
// aggregations that do not reference to groupings, so that the fields/attributes referenced by the aggregations can be
585+
// resolved, and verifier doesn't report field/reference/column not found errors for them.
586+
boolean groupingResolved = Resolvables.resolved(groupings);
587+
int size = groupingResolved ? aggregates.size() : aggregates.size() - groupings.size();
588+
for (int i = 0; i < aggregates.size(); i++) {
589+
NamedExpression maybeResolvedAgg = aggregates.get(i);
590+
if (i < size) { // Skip resolving references to groupings in the aggregations if the groupings are not resolved yet.
591+
maybeResolvedAgg = (NamedExpression) maybeResolvedAgg.transformUp(UnresolvedAttribute.class, ua -> {
592+
Expression ne = ua;
593+
Attribute maybeResolved = maybeResolveAttribute(ua, resolvedList);
594+
// An item in aggregations can reference to groupings explicitly, if groupings are not resolved yet and
595+
// maybeResolved is not resolved, return the original UnresolvedAttribute, so that it has another chance
596+
// to get resolved in the next iteration.
597+
// For example STATS c = count(emp_no), x = d::int + 1 BY d = (date == "2025-01-01")
598+
if (groupingResolved || maybeResolved.resolved()) {
599+
changed.set(true);
600+
ne = maybeResolved;
601+
}
602+
return ne;
603+
});
604+
}
605+
newAggregates.add(maybeResolvedAgg);
599606
}
600607

601608
// TODO: remove this when Stats interface is removed

x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java

Lines changed: 149 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,11 @@
4747
import org.elasticsearch.xpack.esql.expression.function.fulltext.MatchOperator;
4848
import org.elasticsearch.xpack.esql.expression.function.fulltext.MultiMatch;
4949
import org.elasticsearch.xpack.esql.expression.function.fulltext.QueryString;
50+
import org.elasticsearch.xpack.esql.expression.function.grouping.Bucket;
51+
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToInteger;
5052
import org.elasticsearch.xpack.esql.expression.function.scalar.string.Concat;
5153
import org.elasticsearch.xpack.esql.expression.function.scalar.string.Substring;
54+
import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.Add;
5255
import org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.Equals;
5356
import org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.GreaterThan;
5457
import org.elasticsearch.xpack.esql.index.EsIndex;
@@ -80,6 +83,7 @@
8083
import org.elasticsearch.xpack.esql.session.IndexResolver;
8184

8285
import java.io.IOException;
86+
import java.time.Period;
8387
import java.util.ArrayList;
8488
import java.util.List;
8589
import java.util.Map;
@@ -110,6 +114,9 @@
110114
import static org.elasticsearch.xpack.esql.analysis.AnalyzerTestUtils.randomValueOtherThanTest;
111115
import static org.elasticsearch.xpack.esql.analysis.AnalyzerTestUtils.tsdbIndexResolution;
112116
import static org.elasticsearch.xpack.esql.core.tree.Source.EMPTY;
117+
import static org.elasticsearch.xpack.esql.core.type.DataType.DATETIME;
118+
import static org.elasticsearch.xpack.esql.core.type.DataType.DATE_PERIOD;
119+
import static org.elasticsearch.xpack.esql.type.EsqlDataTypeConverter.dateTimeToString;
113120
import static org.hamcrest.Matchers.contains;
114121
import static org.hamcrest.Matchers.containsString;
115122
import static org.hamcrest.Matchers.empty;
@@ -368,7 +375,7 @@ public void testNoProjection() {
368375
DataType.INTEGER,
369376
DataType.KEYWORD,
370377
DataType.TEXT,
371-
DataType.DATETIME,
378+
DATETIME,
372379
DataType.TEXT,
373380
DataType.KEYWORD,
374381
DataType.INTEGER,
@@ -3789,6 +3796,147 @@ public void testResolveCompletionOutputField() {
37893796
assertThat(getAttributeByName(esRelation.output(), "description"), not(equalTo(completion.targetField())));
37903797
}
37913798

3799+
public void testResolveGroupingsBeforeResolvingImplicitReferencesToGroupings() {
3800+
var plan = analyze("""
3801+
FROM test
3802+
| EVAL date = "2025-01-01"::datetime
3803+
| STATS c = count(emp_no) BY d = (date == "2025-01-01")
3804+
""", "mapping-default.json");
3805+
3806+
var limit = as(plan, Limit.class);
3807+
var agg = as(limit.child(), Aggregate.class);
3808+
var aggregates = agg.aggregates();
3809+
assertThat(aggregates, hasSize(2));
3810+
Alias a = as(aggregates.get(0), Alias.class);
3811+
assertEquals("c", a.name());
3812+
Count c = as(a.child(), Count.class);
3813+
FieldAttribute fa = as(c.field(), FieldAttribute.class);
3814+
assertEquals("emp_no", fa.name());
3815+
ReferenceAttribute ra = as(aggregates.get(1), ReferenceAttribute.class); // reference in aggregates is resolved
3816+
assertEquals("d", ra.name());
3817+
List<Expression> groupings = agg.groupings();
3818+
assertEquals(1, groupings.size());
3819+
a = as(groupings.get(0), Alias.class); // reference in groupings is resolved
3820+
assertEquals("d", ra.name());
3821+
Equals equals = as(a.child(), Equals.class);
3822+
ra = as(equals.left(), ReferenceAttribute.class);
3823+
assertEquals("date", ra.name());
3824+
Literal literal = as(equals.right(), Literal.class);
3825+
assertEquals("2025-01-01T00:00:00.000Z", dateTimeToString(Long.parseLong(literal.value().toString())));
3826+
assertEquals(DATETIME, literal.dataType());
3827+
}
3828+
3829+
public void testResolveGroupingsBeforeResolvingExplicitReferencesToGroupings() {
3830+
var plan = analyze("""
3831+
FROM test
3832+
| EVAL date = "2025-01-01"::datetime
3833+
| STATS c = count(emp_no), x = d::int + 1 BY d = (date == "2025-01-01")
3834+
""", "mapping-default.json");
3835+
3836+
var limit = as(plan, Limit.class);
3837+
var agg = as(limit.child(), Aggregate.class);
3838+
var aggregates = agg.aggregates();
3839+
assertThat(aggregates, hasSize(3));
3840+
Alias a = as(aggregates.get(0), Alias.class);
3841+
assertEquals("c", a.name());
3842+
Count c = as(a.child(), Count.class);
3843+
FieldAttribute fa = as(c.field(), FieldAttribute.class);
3844+
assertEquals("emp_no", fa.name());
3845+
a = as(aggregates.get(1), Alias.class); // explicit reference to groupings is resolved
3846+
assertEquals("x", a.name());
3847+
Add add = as(a.child(), Add.class);
3848+
ToInteger toInteger = as(add.left(), ToInteger.class);
3849+
ReferenceAttribute ra = as(toInteger.field(), ReferenceAttribute.class);
3850+
assertEquals("d", ra.name());
3851+
ra = as(aggregates.get(2), ReferenceAttribute.class); // reference in aggregates is resolved
3852+
assertEquals("d", ra.name());
3853+
List<Expression> groupings = agg.groupings();
3854+
assertEquals(1, groupings.size());
3855+
a = as(groupings.get(0), Alias.class); // reference in groupings is resolved
3856+
assertEquals("d", ra.name());
3857+
Equals equals = as(a.child(), Equals.class);
3858+
ra = as(equals.left(), ReferenceAttribute.class);
3859+
assertEquals("date", ra.name());
3860+
Literal literal = as(equals.right(), Literal.class);
3861+
assertEquals("2025-01-01T00:00:00.000Z", dateTimeToString(Long.parseLong(literal.value().toString())));
3862+
assertEquals(DATETIME, literal.dataType());
3863+
}
3864+
3865+
public void testBucketWithIntervalInStringInBothAggregationAndGrouping() {
3866+
var plan = analyze("""
3867+
FROM test
3868+
| STATS c = count(emp_no), b = BUCKET(hire_date, "1 year") + 1 year BY yr = BUCKET(hire_date, "1 year")
3869+
""", "mapping-default.json");
3870+
3871+
var limit = as(plan, Limit.class);
3872+
var agg = as(limit.child(), Aggregate.class);
3873+
var aggregates = agg.aggregates();
3874+
assertThat(aggregates, hasSize(3));
3875+
Alias a = as(aggregates.get(0), Alias.class);
3876+
assertEquals("c", a.name());
3877+
Count c = as(a.child(), Count.class);
3878+
FieldAttribute fa = as(c.field(), FieldAttribute.class);
3879+
assertEquals("emp_no", fa.name());
3880+
a = as(aggregates.get(1), Alias.class); // explicit reference to groupings is resolved
3881+
assertEquals("b", a.name());
3882+
Add add = as(a.child(), Add.class);
3883+
Bucket bucket = as(add.left(), Bucket.class);
3884+
fa = as(bucket.field(), FieldAttribute.class);
3885+
assertEquals("hire_date", fa.name());
3886+
Literal literal = as(bucket.buckets(), Literal.class);
3887+
Literal oneYear = new Literal(EMPTY, Period.ofYears(1), DATE_PERIOD);
3888+
assertEquals(oneYear, literal);
3889+
literal = as(add.right(), Literal.class);
3890+
assertEquals(oneYear, literal);
3891+
ReferenceAttribute ra = as(aggregates.get(2), ReferenceAttribute.class); // reference in aggregates is resolved
3892+
assertEquals("yr", ra.name());
3893+
List<Expression> groupings = agg.groupings();
3894+
assertEquals(1, groupings.size());
3895+
a = as(groupings.get(0), Alias.class); // reference in groupings is resolved
3896+
assertEquals("yr", ra.name());
3897+
bucket = as(a.child(), Bucket.class);
3898+
fa = as(bucket.field(), FieldAttribute.class);
3899+
assertEquals("hire_date", fa.name());
3900+
literal = as(bucket.buckets(), Literal.class);
3901+
assertEquals(oneYear, literal);
3902+
}
3903+
3904+
public void testBucketWithIntervalInStringInGroupingReferencedInAggregation() {
3905+
var plan = analyze("""
3906+
FROM test
3907+
| STATS c = count(emp_no), b = yr + 1 year BY yr = BUCKET(hire_date, "1 year")
3908+
""", "mapping-default.json");
3909+
3910+
var limit = as(plan, Limit.class);
3911+
var agg = as(limit.child(), Aggregate.class);
3912+
var aggregates = agg.aggregates();
3913+
assertThat(aggregates, hasSize(3));
3914+
Alias a = as(aggregates.get(0), Alias.class);
3915+
assertEquals("c", a.name());
3916+
Count c = as(a.child(), Count.class);
3917+
FieldAttribute fa = as(c.field(), FieldAttribute.class);
3918+
assertEquals("emp_no", fa.name());
3919+
a = as(aggregates.get(1), Alias.class); // explicit reference to groupings is resolved
3920+
assertEquals("b", a.name());
3921+
Add add = as(a.child(), Add.class);
3922+
ReferenceAttribute ra = as(add.left(), ReferenceAttribute.class);
3923+
assertEquals("yr", ra.name());
3924+
Literal oneYear = new Literal(EMPTY, Period.ofYears(1), DATE_PERIOD);
3925+
Literal literal = as(add.right(), Literal.class);
3926+
assertEquals(oneYear, literal);
3927+
ra = as(aggregates.get(2), ReferenceAttribute.class); // reference in aggregates is resolved
3928+
assertEquals("yr", ra.name());
3929+
List<Expression> groupings = agg.groupings();
3930+
assertEquals(1, groupings.size());
3931+
a = as(groupings.get(0), Alias.class); // reference in groupings is resolved
3932+
assertEquals("yr", ra.name());
3933+
Bucket bucket = as(a.child(), Bucket.class);
3934+
fa = as(bucket.field(), FieldAttribute.class);
3935+
assertEquals("hire_date", fa.name());
3936+
literal = as(bucket.buckets(), Literal.class);
3937+
assertEquals(oneYear, literal);
3938+
}
3939+
37923940
@Override
37933941
protected IndexAnalyzers createDefaultIndexAnalyzers() {
37943942
return super.createDefaultIndexAnalyzers();

0 commit comments

Comments
 (0)