Skip to content

Commit 98bc563

Browse files
committed
Replace grouping by DateFormat With DateTrunc
1 parent 7ec8fcc commit 98bc563

File tree

2 files changed

+118
-0
lines changed

2 files changed

+118
-0
lines changed

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizer.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
import org.elasticsearch.xpack.esql.optimizer.rules.logical.ReplaceAggregateAggExpressionWithEval;
4949
import org.elasticsearch.xpack.esql.optimizer.rules.logical.ReplaceAggregateNestedExpressionWithEval;
5050
import org.elasticsearch.xpack.esql.optimizer.rules.logical.ReplaceAliasingEvalWithProject;
51+
import org.elasticsearch.xpack.esql.optimizer.rules.logical.ReplaceGroupingByDateFormatWithDateTrunc;
5152
import org.elasticsearch.xpack.esql.optimizer.rules.logical.ReplaceLimitAndSortAsTopN;
5253
import org.elasticsearch.xpack.esql.optimizer.rules.logical.ReplaceOrderByExpressionWithEval;
5354
import org.elasticsearch.xpack.esql.optimizer.rules.logical.ReplaceRegexMatch;
@@ -137,6 +138,7 @@ protected static Batch<LogicalPlan> substitutions() {
137138
// Needs to occur before ReplaceAggregateAggExpressionWithEval, which will update the functions, losing the filter.
138139
new SubstituteFilteredExpression(),
139140
new RemoveStatsOverride(),
141+
new ReplaceGroupingByDateFormatWithDateTrunc(),
140142
// first extract nested expressions inside aggs
141143
new ReplaceAggregateNestedExpressionWithEval(),
142144
// then extract nested aggs top-level
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License
4+
* 2.0; you may not use this file except in compliance with the Elastic License
5+
* 2.0.
6+
*/
7+
8+
package org.elasticsearch.xpack.esql.optimizer.rules.logical;
9+
10+
import org.elasticsearch.xpack.esql.core.expression.Alias;
11+
import org.elasticsearch.xpack.esql.core.expression.Expression;
12+
import org.elasticsearch.xpack.esql.core.expression.Literal;
13+
import org.elasticsearch.xpack.esql.core.tree.Source;
14+
import org.elasticsearch.xpack.esql.core.type.DataType;
15+
import org.elasticsearch.xpack.esql.expression.function.scalar.date.DateFormat;
16+
import org.elasticsearch.xpack.esql.expression.function.scalar.date.DateTrunc;
17+
import org.elasticsearch.xpack.esql.plan.logical.Aggregate;
18+
import org.elasticsearch.xpack.esql.plan.logical.Eval;
19+
import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan;
20+
21+
import java.time.Period;
22+
import java.time.format.DateTimeFormatter;
23+
import java.time.temporal.ChronoField;
24+
import java.time.temporal.ChronoUnit;
25+
import java.time.temporal.IsoFields;
26+
import java.util.ArrayList;
27+
import java.util.List;
28+
29+
/**
30+
* An optimizer rule that replaces DATE_FORMAT function calls in GROUP BY clauses with more efficient DATE_TRUNC operations.
31+
* This optimization is possible when the date format pattern can be mapped to a specific time interval.
32+
* <p>
33+
* For example,
34+
* {@code STATS my_sum = SUM(value) BY month = DATE_FORMAT("yyyy-MM", timestamp) }
35+
* can be optimized to
36+
* {@code STATS my_sum = SUM(value) BY month= DATE_TRUNC(1 month, timestamp) | EVAL month = DATE_FORMAT("yyyy-MM", month) }
37+
* which is more efficient for grouping operations.
38+
* <p>
39+
* The rule analyzes the format pattern and maps it to the smallest possible time interval that preserves the grouping semantics.
40+
* Supported intervals range from nanoseconds to years, including special cases like quarters and weeks.
41+
* <p>
42+
* This optimization not only improves performance but also ensures correctness in time-based grouping:
43+
* DATE_TRUNC properly handles timezone and daylight saving time (DST) transitions when using Period or Duration intervals,
44+
* while DATE_FORMAT does not account for these timezone-related considerations.
45+
*/
46+
public class ReplaceGroupingByDateFormatWithDateTrunc extends OptimizerRules.OptimizerRule<Aggregate> {
47+
48+
@Override
49+
protected LogicalPlan rule(Aggregate aggregate) {
50+
51+
List<Alias> evals = new ArrayList<>();
52+
List<Expression> newGroupings = new ArrayList<>(aggregate.groupings());
53+
54+
// extract `DateFormat` in groupings
55+
for (int i = 0, s = newGroupings.size(); i < s; i++) {
56+
Expression g = newGroupings.get(i);
57+
if (g instanceof Alias as && as.child() instanceof DateFormat df) {
58+
Literal format = (Literal) df.children().getFirst();
59+
Expression field = df.children().get(1);
60+
61+
Literal interval = formatToMinimalInterval((String) format.value(), g.source());
62+
// if the format can be optimized to `DATE_TRUNC`
63+
if (interval != null) {
64+
DateTrunc dateTrunc = new DateTrunc(df.source(), interval, field);
65+
Alias alias = as.replaceChild(dateTrunc);
66+
newGroupings.set(i, alias);
67+
68+
Expression expression = df.replaceChildren(List.of(format, alias.toAttribute()));
69+
evals.add(new Alias(as.source(), as.name(), expression));
70+
}
71+
}
72+
}
73+
74+
if (evals.isEmpty() == false) {
75+
aggregate = aggregate.with(aggregate.child(), newGroupings, aggregate.aggregates());
76+
return new Eval(aggregate.source(), aggregate, evals);
77+
}
78+
return aggregate;
79+
}
80+
81+
private static Literal formatToMinimalInterval(String format, Source source) {
82+
try {
83+
DateTimeFormatter formatter = DateTimeFormatter.ofPattern(format);
84+
String formatterAsString = formatter.toString();
85+
if (formatterAsString.contains(ChronoField.NANO_OF_SECOND.toString())
86+
|| formatterAsString.contains(ChronoField.NANO_OF_DAY.toString())) {
87+
return new Literal(source, ChronoUnit.NANOS.getDuration(), DataType.TIME_DURATION);
88+
} else if (formatterAsString.contains(ChronoField.MILLI_OF_DAY.toString())) {
89+
return new Literal(source, ChronoUnit.MILLIS.getDuration(), DataType.TIME_DURATION);
90+
} else if (formatterAsString.contains(ChronoField.SECOND_OF_MINUTE.toString())) {
91+
return new Literal(source, ChronoUnit.SECONDS.getDuration(), DataType.TIME_DURATION);
92+
} else if (formatterAsString.contains(ChronoField.MINUTE_OF_HOUR.toString())) {
93+
return new Literal(source, ChronoUnit.MINUTES.getDuration(), DataType.TIME_DURATION);
94+
} else if (formatterAsString.contains(ChronoField.HOUR_OF_DAY.toString())
95+
|| formatterAsString.contains(ChronoField.CLOCK_HOUR_OF_DAY.toString())
96+
|| formatterAsString.contains(ChronoField.HOUR_OF_AMPM.toString())
97+
|| formatterAsString.contains(ChronoField.CLOCK_HOUR_OF_AMPM.toString())) {
98+
return new Literal(source, ChronoUnit.HOURS.getDuration(), DataType.TIME_DURATION);
99+
} else if (formatterAsString.contains(ChronoField.AMPM_OF_DAY.toString())) {
100+
return new Literal(source, ChronoUnit.HALF_DAYS, DataType.TIME_DURATION);
101+
} else if (formatterAsString.contains(ChronoField.DAY_OF_WEEK.toString())) {
102+
return new Literal(source, Period.ofDays(1), DataType.DATE_PERIOD);
103+
} else if (formatterAsString.contains(ChronoField.ALIGNED_WEEK_OF_MONTH.toString())) {
104+
return new Literal(source, Period.ofDays(7), DataType.DATE_PERIOD);
105+
} else if (formatterAsString.contains(ChronoField.MONTH_OF_YEAR.toString())) {
106+
return new Literal(source, Period.ofMonths(1), DataType.DATE_PERIOD);
107+
} else if (formatterAsString.contains(IsoFields.QUARTER_OF_YEAR.toString())) {
108+
return new Literal(source, Period.ofMonths(3), DataType.DATE_PERIOD);
109+
} else if (formatterAsString.contains(ChronoField.YEAR_OF_ERA.toString())
110+
|| formatterAsString.contains(ChronoField.YEAR.toString())) {
111+
return new Literal(source, Period.ofYears(1), DataType.DATE_PERIOD);
112+
}
113+
} catch (IllegalArgumentException ignored) {}
114+
return null;
115+
}
116+
}

0 commit comments

Comments
 (0)