Skip to content

Commit 17e6b6e

Browse files
[ES|QL] Resolve groupings in aggregate before resolving references to groupings in the aggregations (#127524) (#127760)
* resolve groupings before resolving references to groupings in the aggregations (cherry picked from commit 4e4b89e) # Conflicts: # x-pack/plugin/esql/qa/testFixtures/src/main/resources/bucket.csv-spec # x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java # x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java
1 parent 1eff103 commit 17e6b6e

File tree

6 files changed

+246
-20
lines changed

6 files changed

+246
-20
lines changed

docs/changelog/127524.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 127524
2+
summary: Resolve groupings in aggregate before resolving references to groupings in
3+
the aggregations
4+
area: ES|QL
5+
type: bug
6+
issues: []

x-pack/plugin/esql/qa/testFixtures/src/main/resources/bucket.csv-spec

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -815,3 +815,37 @@ c:long |b:date
815815
0 |null
816816
1 |1965-01-01T00:00:00.000Z
817817
;
818+
819+
resolveGroupingsBeforeResolvingImplicitReferencesToGroupings
820+
required_capability: resolve_groupings_before_resolving_references_to_groupings_in_aggregations
821+
822+
FROM employees
823+
| STATS c = count(emp_no), b = BUCKET(hire_date, "1 year") + 1 year BY yr = BUCKET(hire_date, "1 year")
824+
| SORT yr
825+
| LIMIT 5
826+
;
827+
828+
c:long | b:datetime | yr:datetime
829+
11 | 1986-01-01T00:00:00.000Z | 1985-01-01T00:00:00.000Z
830+
11 | 1987-01-01T00:00:00.000Z | 1986-01-01T00:00:00.000Z
831+
15 | 1988-01-01T00:00:00.000Z | 1987-01-01T00:00:00.000Z
832+
9 | 1989-01-01T00:00:00.000Z | 1988-01-01T00:00:00.000Z
833+
13 | 1990-01-01T00:00:00.000Z | 1989-01-01T00:00:00.000Z
834+
;
835+
836+
resolveGroupingsBeforeResolvingExplicitReferencesToGroupings
837+
required_capability: resolve_groupings_before_resolving_references_to_groupings_in_aggregations
838+
839+
FROM employees
840+
| STATS c = count(emp_no), b = yr + 1 year BY yr = BUCKET(hire_date, "1 year")
841+
| SORT yr
842+
| LIMIT 5
843+
;
844+
845+
c:long | b:datetime | yr:datetime
846+
11 | 1986-01-01T00:00:00.000Z | 1985-01-01T00:00:00.000Z
847+
11 | 1987-01-01T00:00:00.000Z | 1986-01-01T00:00:00.000Z
848+
15 | 1988-01-01T00:00:00.000Z | 1987-01-01T00:00:00.000Z
849+
9 | 1989-01-01T00:00:00.000Z | 1988-01-01T00:00:00.000Z
850+
13 | 1990-01-01T00:00:00.000Z | 1989-01-01T00:00:00.000Z
851+
;

x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3097,3 +3097,27 @@ ROW a = [1,2,3], b = 5
30973097
STD_DEV(a):double | STD_DEV(b):double
30983098
0.816496580927726 | 0.0
30993099
;
3100+
3101+
resolveGroupingsBeforeResolvingImplicitReferencesToGroupings
3102+
required_capability: resolve_groupings_before_resolving_references_to_groupings_in_aggregations
3103+
3104+
FROM employees
3105+
| EVAL date = "2025-01-01"::datetime
3106+
| stats m = MAX(hire_date) BY d = (date == "2025-01-01")
3107+
;
3108+
3109+
m:datetime | d:boolean
3110+
1999-04-30T00:00:00.000Z | true
3111+
;
3112+
3113+
resolveGroupingsBeforeResolvingExplicitReferencesToGroupings
3114+
required_capability: resolve_groupings_before_resolving_references_to_groupings_in_aggregations
3115+
3116+
FROM employees
3117+
| EVAL date = "2025-01-01"::datetime
3118+
| stats m = MAX(hire_date), x = d::int + 1 BY d = (date == "2025-01-01")
3119+
;
3120+
3121+
m:datetime | x:integer | d:boolean
3122+
1999-04-30T00:00:00.000Z | 2 | true
3123+
;

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -838,7 +838,12 @@ public enum Cap {
838838
* During resolution (pre-analysis) we have to consider that joins or enriches can override EVALuated values
839839
* https://github.com/elastic/elasticsearch/issues/126419
840840
*/
841-
FIX_JOIN_MASKING_EVAL;
841+
FIX_JOIN_MASKING_EVAL,
842+
843+
/**
844+
* Resolve groupings before resolving references to groupings in the aggregations.
845+
*/
846+
RESOLVE_GROUPINGS_BEFORE_RESOLVING_REFERENCES_TO_GROUPINGS_IN_AGGREGATIONS;
842847

843848
private final boolean enabled;
844849

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java

Lines changed: 25 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -161,13 +161,8 @@ public class Analyzer extends ParameterizedRuleExecutor<LogicalPlan, AnalyzerCon
161161
);
162162
var resolution = new Batch<>(
163163
"Resolution",
164-
/*
165-
* ImplicitCasting must be before ResolveRefs. Because a reference is created for a Bucket in Aggregate's aggregates,
166-
* resolving this reference before implicit casting may cause this reference to have customMessage=true, it prevents further
167-
* attempts to resolve this reference.
168-
*/
169-
new ImplicitCasting(),
170164
new ResolveRefs(),
165+
new ImplicitCasting(),
171166
new ResolveUnionTypes() // Must be after ResolveRefs, so union types can be found
172167
);
173168
var finish = new Batch<>("Finish Analysis", Limiter.ONCE, new AddImplicitLimit(), new UnionTypesCleanup());
@@ -511,7 +506,7 @@ private Aggregate resolveAggregate(Aggregate aggregate, List<Attribute> children
511506
}
512507
}
513508

514-
if (Resolvables.resolved(groupings) == false || (Resolvables.resolved(aggregates) == false)) {
509+
if (Resolvables.resolved(groupings) == false || Resolvables.resolved(aggregates) == false) {
515510
ArrayList<Attribute> resolved = new ArrayList<>();
516511
for (Expression e : groupings) {
517512
Attribute attr = Expressions.attribute(e);
@@ -522,17 +517,29 @@ private Aggregate resolveAggregate(Aggregate aggregate, List<Attribute> children
522517
List<Attribute> resolvedList = NamedExpressions.mergeOutputAttributes(resolved, childrenOutput);
523518

524519
List<NamedExpression> newAggregates = new ArrayList<>();
525-
for (NamedExpression ag : aggregate.aggregates()) {
526-
var agg = (NamedExpression) ag.transformUp(UnresolvedAttribute.class, ua -> {
527-
Expression ne = ua;
528-
Attribute maybeResolved = maybeResolveAttribute(ua, resolvedList);
529-
if (maybeResolved != null) {
530-
changed.set(true);
531-
ne = maybeResolved;
532-
}
533-
return ne;
534-
});
535-
newAggregates.add(agg);
520+
// If the groupings are not resolved, skip the resolution of the references to groupings in the aggregates, resolve the
521+
// aggregations that do not reference to groupings, so that the fields/attributes referenced by the aggregations can be
522+
// resolved, and verifier doesn't report field/reference/column not found errors for them.
523+
boolean groupingResolved = Resolvables.resolved(groupings);
524+
int size = groupingResolved ? aggregates.size() : aggregates.size() - groupings.size();
525+
for (int i = 0; i < aggregates.size(); i++) {
526+
NamedExpression maybeResolvedAgg = aggregates.get(i);
527+
if (i < size) { // Skip resolving references to groupings in the aggregations if the groupings are not resolved yet.
528+
maybeResolvedAgg = (NamedExpression) maybeResolvedAgg.transformUp(UnresolvedAttribute.class, ua -> {
529+
Expression ne = ua;
530+
Attribute maybeResolved = maybeResolveAttribute(ua, resolvedList);
531+
// An item in aggregations can reference to groupings explicitly, if groupings are not resolved yet and
532+
// maybeResolved is not resolved, return the original UnresolvedAttribute, so that it has another chance
533+
// to get resolved in the next iteration.
534+
// For example STATS c = count(emp_no), x = d::int + 1 BY d = (date == "2025-01-01")
535+
if (groupingResolved || maybeResolved.resolved()) {
536+
changed.set(true);
537+
ne = maybeResolved;
538+
}
539+
return ne;
540+
});
541+
}
542+
newAggregates.add(maybeResolvedAgg);
536543
}
537544

538545
// TODO: remove this when Stats interface is removed

x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java

Lines changed: 151 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
import org.elasticsearch.xpack.esql.core.expression.Attribute;
2929
import org.elasticsearch.xpack.esql.core.expression.AttributeSet;
3030
import org.elasticsearch.xpack.esql.core.expression.EntryExpression;
31+
import org.elasticsearch.xpack.esql.core.expression.Expression;
3132
import org.elasticsearch.xpack.esql.core.expression.Expressions;
3233
import org.elasticsearch.xpack.esql.core.expression.FieldAttribute;
3334
import org.elasticsearch.xpack.esql.core.expression.Literal;
@@ -44,6 +45,10 @@
4445
import org.elasticsearch.xpack.esql.expression.function.fulltext.Match;
4546
import org.elasticsearch.xpack.esql.expression.function.fulltext.MatchOperator;
4647
import org.elasticsearch.xpack.esql.expression.function.fulltext.QueryString;
48+
import org.elasticsearch.xpack.esql.expression.function.grouping.Bucket;
49+
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToInteger;
50+
import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.Add;
51+
import org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.Equals;
4752
import org.elasticsearch.xpack.esql.index.EsIndex;
4853
import org.elasticsearch.xpack.esql.index.IndexResolution;
4954
import org.elasticsearch.xpack.esql.parser.ParsingException;
@@ -66,6 +71,7 @@
6671

6772
import java.io.IOException;
6873
import java.io.InputStream;
74+
import java.time.Period;
6975
import java.util.ArrayList;
7076
import java.util.List;
7177
import java.util.Map;
@@ -92,6 +98,9 @@
9298
import static org.elasticsearch.xpack.esql.analysis.AnalyzerTestUtils.loadMapping;
9399
import static org.elasticsearch.xpack.esql.analysis.AnalyzerTestUtils.tsdbIndexResolution;
94100
import static org.elasticsearch.xpack.esql.core.tree.Source.EMPTY;
101+
import static org.elasticsearch.xpack.esql.core.type.DataType.DATETIME;
102+
import static org.elasticsearch.xpack.esql.core.type.DataType.DATE_PERIOD;
103+
import static org.elasticsearch.xpack.esql.type.EsqlDataTypeConverter.dateTimeToString;
95104
import static org.hamcrest.Matchers.contains;
96105
import static org.hamcrest.Matchers.containsString;
97106
import static org.hamcrest.Matchers.equalTo;
@@ -347,7 +356,7 @@ public void testNoProjection() {
347356
DataType.INTEGER,
348357
DataType.KEYWORD,
349358
DataType.TEXT,
350-
DataType.DATETIME,
359+
DATETIME,
351360
DataType.TEXT,
352361
DataType.KEYWORD,
353362
DataType.INTEGER,
@@ -2819,6 +2828,147 @@ public void testFunctionNamedParamsAsFunctionArgument1() {
28192828
assertEquals(DataType.DOUBLE, ee.dataType());
28202829
}
28212830

2831+
public void testResolveGroupingsBeforeResolvingImplicitReferencesToGroupings() {
2832+
var plan = analyze("""
2833+
FROM test
2834+
| EVAL date = "2025-01-01"::datetime
2835+
| STATS c = count(emp_no) BY d = (date == "2025-01-01")
2836+
""", "mapping-default.json");
2837+
2838+
var limit = as(plan, Limit.class);
2839+
var agg = as(limit.child(), Aggregate.class);
2840+
var aggregates = agg.aggregates();
2841+
assertThat(aggregates, hasSize(2));
2842+
Alias a = as(aggregates.get(0), Alias.class);
2843+
assertEquals("c", a.name());
2844+
Count c = as(a.child(), Count.class);
2845+
FieldAttribute fa = as(c.field(), FieldAttribute.class);
2846+
assertEquals("emp_no", fa.name());
2847+
ReferenceAttribute ra = as(aggregates.get(1), ReferenceAttribute.class); // reference in aggregates is resolved
2848+
assertEquals("d", ra.name());
2849+
List<Expression> groupings = agg.groupings();
2850+
assertEquals(1, groupings.size());
2851+
a = as(groupings.get(0), Alias.class); // reference in groupings is resolved
2852+
assertEquals("d", ra.name());
2853+
Equals equals = as(a.child(), Equals.class);
2854+
ra = as(equals.left(), ReferenceAttribute.class);
2855+
assertEquals("date", ra.name());
2856+
Literal literal = as(equals.right(), Literal.class);
2857+
assertEquals("2025-01-01T00:00:00.000Z", dateTimeToString(Long.parseLong(literal.value().toString())));
2858+
assertEquals(DATETIME, literal.dataType());
2859+
}
2860+
2861+
public void testResolveGroupingsBeforeResolvingExplicitReferencesToGroupings() {
2862+
var plan = analyze("""
2863+
FROM test
2864+
| EVAL date = "2025-01-01"::datetime
2865+
| STATS c = count(emp_no), x = d::int + 1 BY d = (date == "2025-01-01")
2866+
""", "mapping-default.json");
2867+
2868+
var limit = as(plan, Limit.class);
2869+
var agg = as(limit.child(), Aggregate.class);
2870+
var aggregates = agg.aggregates();
2871+
assertThat(aggregates, hasSize(3));
2872+
Alias a = as(aggregates.get(0), Alias.class);
2873+
assertEquals("c", a.name());
2874+
Count c = as(a.child(), Count.class);
2875+
FieldAttribute fa = as(c.field(), FieldAttribute.class);
2876+
assertEquals("emp_no", fa.name());
2877+
a = as(aggregates.get(1), Alias.class); // explicit reference to groupings is resolved
2878+
assertEquals("x", a.name());
2879+
Add add = as(a.child(), Add.class);
2880+
ToInteger toInteger = as(add.left(), ToInteger.class);
2881+
ReferenceAttribute ra = as(toInteger.field(), ReferenceAttribute.class);
2882+
assertEquals("d", ra.name());
2883+
ra = as(aggregates.get(2), ReferenceAttribute.class); // reference in aggregates is resolved
2884+
assertEquals("d", ra.name());
2885+
List<Expression> groupings = agg.groupings();
2886+
assertEquals(1, groupings.size());
2887+
a = as(groupings.get(0), Alias.class); // reference in groupings is resolved
2888+
assertEquals("d", ra.name());
2889+
Equals equals = as(a.child(), Equals.class);
2890+
ra = as(equals.left(), ReferenceAttribute.class);
2891+
assertEquals("date", ra.name());
2892+
Literal literal = as(equals.right(), Literal.class);
2893+
assertEquals("2025-01-01T00:00:00.000Z", dateTimeToString(Long.parseLong(literal.value().toString())));
2894+
assertEquals(DATETIME, literal.dataType());
2895+
}
2896+
2897+
public void testBucketWithIntervalInStringInBothAggregationAndGrouping() {
2898+
var plan = analyze("""
2899+
FROM test
2900+
| STATS c = count(emp_no), b = BUCKET(hire_date, "1 year") + 1 year BY yr = BUCKET(hire_date, "1 year")
2901+
""", "mapping-default.json");
2902+
2903+
var limit = as(plan, Limit.class);
2904+
var agg = as(limit.child(), Aggregate.class);
2905+
var aggregates = agg.aggregates();
2906+
assertThat(aggregates, hasSize(3));
2907+
Alias a = as(aggregates.get(0), Alias.class);
2908+
assertEquals("c", a.name());
2909+
Count c = as(a.child(), Count.class);
2910+
FieldAttribute fa = as(c.field(), FieldAttribute.class);
2911+
assertEquals("emp_no", fa.name());
2912+
a = as(aggregates.get(1), Alias.class); // explicit reference to groupings is resolved
2913+
assertEquals("b", a.name());
2914+
Add add = as(a.child(), Add.class);
2915+
Bucket bucket = as(add.left(), Bucket.class);
2916+
fa = as(bucket.field(), FieldAttribute.class);
2917+
assertEquals("hire_date", fa.name());
2918+
Literal literal = as(bucket.buckets(), Literal.class);
2919+
Literal oneYear = new Literal(EMPTY, Period.ofYears(1), DATE_PERIOD);
2920+
assertEquals(oneYear, literal);
2921+
literal = as(add.right(), Literal.class);
2922+
assertEquals(oneYear, literal);
2923+
ReferenceAttribute ra = as(aggregates.get(2), ReferenceAttribute.class); // reference in aggregates is resolved
2924+
assertEquals("yr", ra.name());
2925+
List<Expression> groupings = agg.groupings();
2926+
assertEquals(1, groupings.size());
2927+
a = as(groupings.get(0), Alias.class); // reference in groupings is resolved
2928+
assertEquals("yr", ra.name());
2929+
bucket = as(a.child(), Bucket.class);
2930+
fa = as(bucket.field(), FieldAttribute.class);
2931+
assertEquals("hire_date", fa.name());
2932+
literal = as(bucket.buckets(), Literal.class);
2933+
assertEquals(oneYear, literal);
2934+
}
2935+
2936+
public void testBucketWithIntervalInStringInGroupingReferencedInAggregation() {
2937+
var plan = analyze("""
2938+
FROM test
2939+
| STATS c = count(emp_no), b = yr + 1 year BY yr = BUCKET(hire_date, "1 year")
2940+
""", "mapping-default.json");
2941+
2942+
var limit = as(plan, Limit.class);
2943+
var agg = as(limit.child(), Aggregate.class);
2944+
var aggregates = agg.aggregates();
2945+
assertThat(aggregates, hasSize(3));
2946+
Alias a = as(aggregates.get(0), Alias.class);
2947+
assertEquals("c", a.name());
2948+
Count c = as(a.child(), Count.class);
2949+
FieldAttribute fa = as(c.field(), FieldAttribute.class);
2950+
assertEquals("emp_no", fa.name());
2951+
a = as(aggregates.get(1), Alias.class); // explicit reference to groupings is resolved
2952+
assertEquals("b", a.name());
2953+
Add add = as(a.child(), Add.class);
2954+
ReferenceAttribute ra = as(add.left(), ReferenceAttribute.class);
2955+
assertEquals("yr", ra.name());
2956+
Literal oneYear = new Literal(EMPTY, Period.ofYears(1), DATE_PERIOD);
2957+
Literal literal = as(add.right(), Literal.class);
2958+
assertEquals(oneYear, literal);
2959+
ra = as(aggregates.get(2), ReferenceAttribute.class); // reference in aggregates is resolved
2960+
assertEquals("yr", ra.name());
2961+
List<Expression> groupings = agg.groupings();
2962+
assertEquals(1, groupings.size());
2963+
a = as(groupings.get(0), Alias.class); // reference in groupings is resolved
2964+
assertEquals("yr", ra.name());
2965+
Bucket bucket = as(a.child(), Bucket.class);
2966+
fa = as(bucket.field(), FieldAttribute.class);
2967+
assertEquals("hire_date", fa.name());
2968+
literal = as(bucket.buckets(), Literal.class);
2969+
assertEquals(oneYear, literal);
2970+
}
2971+
28222972
private void verifyUnsupported(String query, String errorMessage) {
28232973
verifyUnsupported(query, errorMessage, "mapping-multi-field-variation.json");
28242974
}

0 commit comments

Comments
 (0)