Skip to content

Commit d655dde

Browse files
committed
Avoid incorrect grouping with multiple DATE_FORMAT
1 parent ac098d4 commit d655dde

File tree

2 files changed

+74
-24
lines changed

2 files changed

+74
-24
lines changed

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/ReplaceAggregateNestedExpressionWithEval.java

Lines changed: 30 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -72,12 +72,20 @@ protected LogicalPlan rule(Aggregate aggregate) {
7272
Map<GroupingFunction, Attribute> groupingAttributes = new HashMap<>();
7373
List<Expression> newGroupings = new ArrayList<>(aggregate.groupings());
7474
List<NamedExpression> newProjections = new ArrayList<>();
75-
75+
Map<NamedExpression, Attribute> referenceAttributes = new HashMap<>();
7676
boolean groupingChanged = false;
7777

7878
List<Alias> newEvals = new ArrayList<>();
7979
int[] counter = new int[] { 0 };
8080

81+
// Count DateFormat occurrences to avoid incorrect grouping when replacing multiple DATE_FORMAT with DATE_TRUNC
82+
int[] dateFormatCount = new int[] { 0 };
83+
for (Expression g : newGroupings) {
84+
if (g instanceof Alias as && as.child() instanceof DateFormat) {
85+
dateFormatCount[0]++;
86+
}
87+
}
88+
8189
// start with the groupings since the aggs might reuse/reference them
8290
for (int i = 0, s = newGroupings.size(); i < s; i++) {
8391
Expression g = newGroupings.get(i);
@@ -95,7 +103,7 @@ protected LogicalPlan rule(Aggregate aggregate) {
95103
// Move the alias into an eval and replace it with its attribute.
96104
groupingChanged = true;
97105
var attr = as.toAttribute();
98-
if (asChild instanceof DateFormat df) {
106+
if (asChild instanceof DateFormat df && dateFormatCount[0] == 1) {
99107
// Extract the format pattern and field from DateFormat
100108
Literal format = (Literal) df.children().getFirst();
101109
Expression field = df.children().get(1);
@@ -115,7 +123,7 @@ protected LogicalPlan rule(Aggregate aggregate) {
115123
// Create a new eval alias for the optimized expression
116124
Alias newEval = as.replaceChild(expression);
117125
newEvals.add(newEval);
118-
newProjections.add(newEval.toAttribute());
126+
referenceAttributes.put(attr, newEval.toAttribute());
119127
evalNames.put(as.name(), attr);
120128
as = alias;
121129
}
@@ -178,11 +186,12 @@ protected LogicalPlan rule(Aggregate aggregate) {
178186
if (ref != null) {
179187
aggsChanged.set(true);
180188
newAggs.add(ref);
189+
newProjections.add(referenceAttributes.getOrDefault(ref, ref.toAttribute()));
190+
continue;
181191
}
182-
} else {
183-
newAggs.add(a);
184-
newProjections.add(a.toAttribute());
185192
}
193+
newAggs.add(a);
194+
newProjections.add(a.toAttribute());
186195
}
187196

188197
if (evals.size() > 0) {
@@ -285,18 +294,21 @@ private static Literal formatToMinimalInterval(String format, Source source) {
285294
return new Literal(source, ChronoUnit.HOURS.getDuration(), DataType.TIME_DURATION);
286295
} else if (formatterAsString.contains(ChronoField.AMPM_OF_DAY.toString())) {
287296
return new Literal(source, ChronoUnit.HALF_DAYS, DataType.TIME_DURATION);
288-
} else if (formatterAsString.contains(ChronoField.DAY_OF_WEEK.toString())) {
289-
return new Literal(source, Period.ofDays(1), DataType.DATE_PERIOD);
290-
} else if (formatterAsString.contains(ChronoField.ALIGNED_WEEK_OF_MONTH.toString())) {
291-
return new Literal(source, Period.ofDays(7), DataType.DATE_PERIOD);
292-
} else if (formatterAsString.contains(ChronoField.MONTH_OF_YEAR.toString())) {
293-
return new Literal(source, Period.ofMonths(1), DataType.DATE_PERIOD);
294-
} else if (formatterAsString.contains(IsoFields.QUARTER_OF_YEAR.toString())) {
295-
return new Literal(source, Period.ofMonths(3), DataType.DATE_PERIOD);
296-
} else if (formatterAsString.contains(ChronoField.YEAR_OF_ERA.toString())
297-
|| formatterAsString.contains(ChronoField.YEAR.toString())) {
298-
return new Literal(source, Period.ofYears(1), DataType.DATE_PERIOD);
299-
}
297+
} else if (formatterAsString.contains(ChronoField.DAY_OF_WEEK.toString())
298+
|| formatterAsString.contains(ChronoField.DAY_OF_MONTH.toString())
299+
|| formatterAsString.contains(ChronoField.DAY_OF_YEAR.toString())) {
300+
return new Literal(source, Period.ofDays(1), DataType.DATE_PERIOD);
301+
} else if (formatterAsString.contains(ChronoField.ALIGNED_WEEK_OF_MONTH.toString())
302+
|| formatterAsString.contains(ChronoField.ALIGNED_WEEK_OF_YEAR.toString())) {
303+
return new Literal(source, Period.ofDays(7), DataType.DATE_PERIOD);
304+
} else if (formatterAsString.contains(ChronoField.MONTH_OF_YEAR.toString())) {
305+
return new Literal(source, Period.ofMonths(1), DataType.DATE_PERIOD);
306+
} else if (formatterAsString.contains(IsoFields.QUARTER_OF_YEAR.toString())) {
307+
return new Literal(source, Period.ofMonths(3), DataType.DATE_PERIOD);
308+
} else if (formatterAsString.contains(ChronoField.YEAR_OF_ERA.toString())
309+
|| formatterAsString.contains(ChronoField.YEAR.toString())) {
310+
return new Literal(source, Period.ofYears(1), DataType.DATE_PERIOD);
311+
}
300312
} catch (IllegalArgumentException ignored) {}
301313
return null;
302314
}

x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java

Lines changed: 44 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7999,12 +7999,12 @@ public void testSampleNoPushDownChangePoint() {
79997999
}
80008000

80018001
/**
8002-
* Project[[avg{r}#7, month{r}#20]]
8003-
* \_Eval[[$$SUM$avg$0{r$}#21 / $$COUNT$avg$1{r$}#22 AS avg#7, DATEFORMAT([79 79 79 79 2d 4d 4d][KEYWORD],month{r}#4) AS month#20]]
8002+
* Project[[avg{r}#7, date{r}#4]]
8003+
* \_Eval[[$$SUM$avg$0{r$}#20 / $$COUNT$avg$1{r$}#21 AS avg#7]]
80048004
* \_Limit[1000[INTEGER],false]
8005-
* \_Aggregate[[month{r}#4],[SUM(salary{f}#14,true[BOOLEAN]) AS $$SUM$avg$0#21, COUNT(salary{f}#14,true[BOOLEAN]) AS
8006-
* $$COUNT$avg$1#22, month{r}#4]]
8007-
* \_Eval[[DATETRUNC(P1M[DATE_PERIOD],hire_date{f}#16) AS month#4]]
8005+
* \_Aggregate[[date{r}#4],[SUM(salary{f}#14,true[BOOLEAN]) AS $$SUM$avg$0#20, COUNT(salary{f}#14,true[BOOLEAN]) AS $$COUNT$av
8006+
* g$1#21, date{r}#4]]
8007+
* \_Eval[[DATEFORMAT([79 79 79 79][KEYWORD],hire_date{f}#16) AS date#4]]
80088008
* \_EsRelation[test][_meta_field{f}#15, emp_no{f}#9, first_name{f}#10, g..]
80098009
*/
80108010
public void testReplaceGroupingByDateFormatWithDateTrunc() {
@@ -8016,7 +8016,6 @@ public void testReplaceGroupingByDateFormatWithDateTrunc() {
80168016
"yy-mm",
80178017
"yyyy-dd-MM",
80188018
"DD",
8019-
"yyyy-MM-dd HH:mm:ss",
80208019
"yyyy-MM-dd HH:mm:ss"
80218020
);
80228021

@@ -8047,4 +8046,43 @@ public void testReplaceGroupingByDateFormatWithDateTrunc() {
80478046

80488047
}
80498048

8049+
/**
8050+
* Project[[avg{r}#10, date{r}#4, date2{r}#7]]
8051+
* \_Eval[[$$SUM$avg$0{r$}#24 / $$COUNT$avg$1{r$}#25 AS avg#10]]
8052+
* \_Limit[1000[INTEGER],false]
8053+
* \_Aggregate[[date{r}#4, date2{r}#7],[SUM(salary{f}#18,true[BOOLEAN]) AS $$SUM$avg$0#24, COUNT(salary{f}#18,true[BOOLEAN]) A
8054+
* S $$COUNT$avg$1#25, date{r}#4, date2{r}#7]]
8055+
* \_Eval[[DATEFORMAT([79 79 79 79 2d 64 64][KEYWORD],hire_date{f}#20) AS date#4, DATEFORMAT([64 64][KEYWORD],hire_date{
8056+
* f}#20) AS date2#7]]
8057+
* \_EsRelation[test][_meta_field{f}#19, emp_no{f}#13, first_name{f}#14, ..]
8058+
*/
8059+
public void testReplaceGroupingByDateFormatWithDateTrunc2() {
8060+
var query = """
8061+
FROM test
8062+
| STATS avg = AVG(salary) BY date = DATE_FORMAT("yyyy-dd", hire_date), date2 = DATE_FORMAT("dd", hire_date)
8063+
""";
8064+
var optimized = optimizedPlan(query);
8065+
8066+
var project = as(optimized, Project.class);
8067+
var eval = as(project.child(), Eval.class);
8068+
assertThat(eval.fields(), hasSize(1));
8069+
// var dateformat = as(eval.fields().get(1).child(), DateFormat.class);
8070+
8071+
var limit = as(eval.child(), Limit.class);
8072+
var agg = as(limit.child(), Aggregate.class);
8073+
assertThat(agg.groupings(), hasSize(2));
8074+
var grouping1 = as(agg.groupings().getFirst(), ReferenceAttribute.class);
8075+
var grouping2 = as(agg.groupings().get(1), ReferenceAttribute.class);
8076+
8077+
var eval2 = as(agg.child(), Eval.class);
8078+
assertThat(eval2.fields(), hasSize(2));
8079+
var dateFormat1 = as(eval2.fields().getFirst().child(), DateFormat.class);
8080+
var dateFormat2 = as(eval2.fields().getFirst().child(), DateFormat.class);
8081+
8082+
assertThat(eval2.fields().getFirst().toAttribute(), is(grouping1));
8083+
assertThat(eval2.fields().get(1).toAttribute(), is(grouping2));
8084+
8085+
var source = as(eval2.child(), EsRelation.class);
8086+
}
8087+
80508088
}

0 commit comments

Comments
 (0)