elastic
diff --git a/‎x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizer.java‎
Lines changed: 0 additions & 2 deletions b/‎x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizer.java‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/ReplaceAggregateNestedExpressionWithEval.java‎
Lines changed: 112 additions & 4 deletions b/‎x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/ReplaceAggregateNestedExpressionWithEval.java‎
Lines changed: 112 additions & 4 deletions
diff --git a/‎x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/ReplaceGroupingByDateFormatWithDateTrunc.java‎
Lines changed: 0 additions & 116 deletions b/‎x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/ReplaceGroupingByDateFormatWithDateTrunc.java‎
Lines changed: 0 additions & 116 deletions
@@ -48,7 +48,6 @@
 import org.elasticsearch.xpack.esql.optimizer.rules.logical.ReplaceAggregateAggExpressionWithEval;
 import org.elasticsearch.xpack.esql.optimizer.rules.logical.ReplaceAggregateNestedExpressionWithEval;
 import org.elasticsearch.xpack.esql.optimizer.rules.logical.ReplaceAliasingEvalWithProject;
-import org.elasticsearch.xpack.esql.optimizer.rules.logical.ReplaceGroupingByDateFormatWithDateTrunc;
 import org.elasticsearch.xpack.esql.optimizer.rules.logical.ReplaceLimitAndSortAsTopN;
 import org.elasticsearch.xpack.esql.optimizer.rules.logical.ReplaceOrderByExpressionWithEval;
 import org.elasticsearch.xpack.esql.optimizer.rules.logical.ReplaceRegexMatch;
@@ -138,7 +137,6 @@ protected static Batch<LogicalPlan> substitutions() {
             // Needs to occur before ReplaceAggregateAggExpressionWithEval, which will update the functions, losing the filter.
             new SubstituteFilteredExpression(),
             new RemoveStatsOverride(),
-            new ReplaceGroupingByDateFormatWithDateTrunc(),
             // first extract nested expressions inside aggs
             new ReplaceAggregateNestedExpressionWithEval(),
             // then extract nested aggs top-level
 
@@ -10,28 +10,58 @@
 import org.elasticsearch.xpack.esql.core.expression.Alias;
 import org.elasticsearch.xpack.esql.core.expression.Attribute;
 import org.elasticsearch.xpack.esql.core.expression.Expression;
+import org.elasticsearch.xpack.esql.core.expression.Literal;
 import org.elasticsearch.xpack.esql.core.expression.NamedExpression;
+import org.elasticsearch.xpack.esql.core.expression.ReferenceAttribute;
+import org.elasticsearch.xpack.esql.core.tree.Source;
+import org.elasticsearch.xpack.esql.core.type.DataType;
 import org.elasticsearch.xpack.esql.core.util.Holder;
 import org.elasticsearch.xpack.esql.expression.function.aggregate.AggregateFunction;
 import org.elasticsearch.xpack.esql.expression.function.grouping.GroupingFunction;
+import org.elasticsearch.xpack.esql.expression.function.scalar.date.DateFormat;
+import org.elasticsearch.xpack.esql.expression.function.scalar.date.DateTrunc;
 import org.elasticsearch.xpack.esql.plan.logical.Aggregate;
 import org.elasticsearch.xpack.esql.plan.logical.Eval;
 import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan;
+import org.elasticsearch.xpack.esql.plan.logical.Project;
 
+import java.time.Period;
+import java.time.format.DateTimeFormatter;
+import java.time.temporal.ChronoField;
+import java.time.temporal.ChronoUnit;
+import java.time.temporal.IsoFields;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 
 /**
- * Replace nested expressions inside a {@link Aggregate} with synthetic eval.
+ * An optimizer rule that performs two main optimizations:
+ * 1. Replaces nested expressions inside a {@link Aggregate} with synthetic eval
+ * 2. Optimizes DATE_FORMAT function calls in GROUP BY clauses with more efficient DATE_TRUNC operations
+ * <p>
+ * For nested expressions in aggregates:
  * {@code STATS SUM(a + 1) BY x % 2}
  * becomes
  * {@code EVAL `a + 1` = a + 1, `x % 2` = x % 2 | STATS SUM(`a+1`_ref) BY `x % 2`_ref}
  * and
  * {@code INLINESTATS SUM(a + 1) BY x % 2}
  * becomes
  * {@code EVAL `a + 1` = a + 1, `x % 2` = x % 2 | INLINESTATS SUM(`a+1`_ref) BY `x % 2`_ref}
+ * <p>
+ * For date formatting optimization:
+ * {@code STATS sum = SUM(value) BY month = DATE_FORMAT("yyyy-MM", timestamp) }
+ * can be optimized to
+ * {@code STATS sum = SUM(value) BY month1 = DATE_TRUNC(1 month, timestamp) | EVAL month = DATE_FORMAT("yyyy-MM", month1) | KEEP sum, month}
+ * which is more efficient for grouping operations.
+ * <p>
+ * The date formatting optimization analyzes the format pattern and maps it to the smallest possible time interval
+ * that preserves the grouping semantics. Supported intervals range from nanoseconds to years, including special
+ * cases like quarters and weeks.
+ * <p>
+ * This date optimization not only improves performance but also ensures correctness in time-based grouping:
+ * DATE_TRUNC properly handles timezone and daylight saving time (DST) transitions when using Period or Duration
+ * intervals, while DATE_FORMAT does not account for these timezone-related considerations.
  */
 public final class ReplaceAggregateNestedExpressionWithEval extends OptimizerRules.OptimizerRule<Aggregate> {
 
@@ -41,8 +71,13 @@ protected LogicalPlan rule(Aggregate aggregate) {
         Map<String, Attribute> evalNames = new HashMap<>();
         Map<GroupingFunction, Attribute> groupingAttributes = new HashMap<>();
         List<Expression> newGroupings = new ArrayList<>(aggregate.groupings());
+        List<NamedExpression> newProjections = new ArrayList<>();
+
         boolean groupingChanged = false;
 
+        List<Alias> newEvals = new ArrayList<>();
+        int[] counter = new int[] { 0 };
+
         // start with the groupings since the aggs might reuse/reference them
         for (int i = 0, s = newGroupings.size(); i < s; i++) {
             Expression g = newGroupings.get(i);
@@ -60,6 +95,32 @@ protected LogicalPlan rule(Aggregate aggregate) {
                     // Move the alias into an eval and replace it with its attribute.
                     groupingChanged = true;
                     var attr = as.toAttribute();
+                    if (asChild instanceof DateFormat df) {
+                        // Extract the format pattern and field from DateFormat
+                        Literal format = (Literal) df.children().getFirst();
+                        Expression field = df.children().get(1);
+
+                        // Try to convert the format pattern to a minimal time interval
+                        // This optimization attempts to simplify date formatting to DATE_TRUNC operations
+                        Literal interval = formatToMinimalInterval((String) format.value(), g.source());
+                        // If we can optimize the format to use DATE_TRUNC
+                        if (interval != null) {
+                            // Create a new DateTrunc operation with the optimized interval
+                            DateTrunc dateTrunc = new DateTrunc(df.source(), interval, field);
+                            // Create a synthetic alias for the DateTrunc operation
+                            var alias = new Alias(as.source(), syntheticName(dateTrunc, as, counter[0]++), dateTrunc, null, true);
+                            attr = alias.toAttribute();
+                            // Replace the original DateFormat children with the new format and attribute
+                            Expression expression = df.replaceChildren(List.of(format, attr));
+                            // Create a new eval alias for the optimized expression
+                            Alias newEval = as.replaceChild(expression);
+                            newEvals.add(newEval);
+                            newProjections.add(newEval.toAttribute());
+                            evalNames.put(as.name(), attr);
+                            as = alias;
+                        }
+                    }
+
                     evals.add(as);
                     evalNames.put(as.name(), attr);
                     newGroupings.set(i, attr);
@@ -80,7 +141,6 @@ protected LogicalPlan rule(Aggregate aggregate) {
             expToAttribute.put(a.child().canonical(), a.toAttribute());
         }
 
-        int[] counter = new int[] { 0 };
         // for the aggs make sure to unwrap the agg function and check the existing groupings
         for (NamedExpression agg : aggs) {
             NamedExpression a = (NamedExpression) agg.transformDown(Alias.class, as -> {
@@ -113,8 +173,16 @@ protected LogicalPlan rule(Aggregate aggregate) {
 
                 return as.replaceChild(replaced);
             });
-
-            newAggs.add(a);
+            if (groupingChanged && agg instanceof ReferenceAttribute ra) {
+                Attribute ref = evalNames.get(ra.name());
+                if (ref != null) {
+                    aggsChanged.set(true);
+                    newAggs.add(ref);
+                }
+            } else {
+                newAggs.add(a);
+                newProjections.add(a.toAttribute());
+            }
         }
 
         if (evals.size() > 0) {
@@ -124,6 +192,10 @@ protected LogicalPlan rule(Aggregate aggregate) {
             var newEval = new Eval(aggregate.source(), aggregate.child(), evals);
             aggregate = aggregate.with(newEval, groupings, aggregates);
         }
+        if (newEvals.size() > 0) {
+            Eval eval = new Eval(aggregate.source(), aggregate, newEvals);
+            return new Project(aggregate.source(), eval, newProjections);
+        }
 
         return aggregate;
     }
@@ -192,4 +264,40 @@ private static Expression transformAggregateFunction(
     private static String syntheticName(Expression expression, Expression func, int counter) {
         return TemporaryNameUtils.temporaryName(expression, func, counter);
     }
+
+    private static Literal formatToMinimalInterval(String format, Source source) {
+        try {
+            DateTimeFormatter formatter = DateTimeFormatter.ofPattern(format);
+            String formatterAsString = formatter.toString();
+            if (formatterAsString.contains(ChronoField.NANO_OF_SECOND.toString())
+                || formatterAsString.contains(ChronoField.NANO_OF_DAY.toString())) {
+                return new Literal(source, ChronoUnit.NANOS.getDuration(), DataType.TIME_DURATION);
+            } else if (formatterAsString.contains(ChronoField.MILLI_OF_DAY.toString())) {
+                return new Literal(source, ChronoUnit.MILLIS.getDuration(), DataType.TIME_DURATION);
+            } else if (formatterAsString.contains(ChronoField.SECOND_OF_MINUTE.toString())) {
+                return new Literal(source, ChronoUnit.SECONDS.getDuration(), DataType.TIME_DURATION);
+            } else if (formatterAsString.contains(ChronoField.MINUTE_OF_HOUR.toString())) {
+                return new Literal(source, ChronoUnit.MINUTES.getDuration(), DataType.TIME_DURATION);
+            } else if (formatterAsString.contains(ChronoField.HOUR_OF_DAY.toString())
+                || formatterAsString.contains(ChronoField.CLOCK_HOUR_OF_DAY.toString())
+                || formatterAsString.contains(ChronoField.HOUR_OF_AMPM.toString())
+                || formatterAsString.contains(ChronoField.CLOCK_HOUR_OF_AMPM.toString())) {
+                    return new Literal(source, ChronoUnit.HOURS.getDuration(), DataType.TIME_DURATION);
+                } else if (formatterAsString.contains(ChronoField.AMPM_OF_DAY.toString())) {
+                    return new Literal(source, ChronoUnit.HALF_DAYS, DataType.TIME_DURATION);
+                } else if (formatterAsString.contains(ChronoField.DAY_OF_WEEK.toString())) {
+                    return new Literal(source, Period.ofDays(1), DataType.DATE_PERIOD);
+                } else if (formatterAsString.contains(ChronoField.ALIGNED_WEEK_OF_MONTH.toString())) {
+                    return new Literal(source, Period.ofDays(7), DataType.DATE_PERIOD);
+                } else if (formatterAsString.contains(ChronoField.MONTH_OF_YEAR.toString())) {
+                    return new Literal(source, Period.ofMonths(1), DataType.DATE_PERIOD);
+                } else if (formatterAsString.contains(IsoFields.QUARTER_OF_YEAR.toString())) {
+                    return new Literal(source, Period.ofMonths(3), DataType.DATE_PERIOD);
+                } else if (formatterAsString.contains(ChronoField.YEAR_OF_ERA.toString())
+                    || formatterAsString.contains(ChronoField.YEAR.toString())) {
+                        return new Literal(source, Period.ofYears(1), DataType.DATE_PERIOD);
+                    }
+        } catch (IllegalArgumentException ignored) {}
+        return null;
+    }
 }