elastic · luigidellaquila · Feb 10, 2025 · Jan 29, 2025 · Jan 29, 2025 · Jan 29, 2025
diff --git a/docs/changelog/121156.yaml b/docs/changelog/121156.yaml
@@ -0,0 +1,5 @@
+pr: 121156
+summary: Remove redundant sorts from execution plan
+area: ES|QL
+type: bug
+issues: []
diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec
@@ -1346,3 +1346,50 @@ language_code:integer | language_name:keyword       | country:text
 1                     | English                     | United States of America
 1                     | English                     | null
 ;
+
+
+sortBeforeAndAfterJoin
+required_capability: join_lookup_v12
+required_capability: remove_redundant_sort
+
+FROM employees
+| sort first_name
+| EVAL language_code = languages
+| LOOKUP JOIN languages_lookup ON language_code
+| WHERE emp_no >= 10091 AND emp_no < 10094
+| SORT emp_no
+| KEEP emp_no, language_code, language_name
+;
+
+emp_no:integer | language_code:integer | language_name:keyword
+10091          | 3                     | Spanish
+10092          | 1                     | English
+10093          | 3                     | Spanish
+;
+
+
+
+sortBeforeAndAfterMultipleJoinAndMvExpand
+required_capability: join_lookup_v12
+required_capability: remove_redundant_sort
+
+FROM employees
+| sort first_name
+| EVAL language_code = languages
+| LOOKUP JOIN languages_lookup ON language_code
+| WHERE emp_no >= 10091 AND emp_no < 10094
+| SORT language_name
+| MV_EXPAND first_name
+| SORT first_name
+| MV_EXPAND last_name
+| SORT last_name
+| LOOKUP JOIN languages_lookup ON language_code
+| SORT emp_no
+| KEEP emp_no, language_code, language_name
+;
+
+emp_no:integer | language_code:integer | language_name:keyword
+10091          | 3                     | Spanish
+10092          | 1                     | English
+10093          | 3                     | Spanish
+;
diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mv_expand.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mv_expand.csv-spec
@@ -404,3 +404,17 @@ from employees | where emp_no == 10003  | mv_expand first_name | keep first_name
 first_name:keyword
 Parto
 ;
+
+
+sortBeforeAndAfterMvExpand
+from employees 
+| sort first_name 
+| mv_expand job_positions 
+| sort emp_no, job_positions 
+| keep emp_no, job_positions 
+| limit 2;
+
+emp_no:integer | job_positions:keyword   
+10001          | Accountant
+10001          | Senior Python Developer          
+;
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java
@@ -791,7 +791,14 @@ public enum Cap {
         /**
          * Support for aggregate_metric_double type
          */
-        AGGREGATE_METRIC_DOUBLE(AGGREGATE_METRIC_DOUBLE_FEATURE_FLAG.isEnabled());
+        AGGREGATE_METRIC_DOUBLE(AGGREGATE_METRIC_DOUBLE_FEATURE_FLAG.isEnabled()),
+
+        /**
+         * Fix for https://github.com/elastic/elasticsearch/issues/120817
+         * and https://github.com/elastic/elasticsearch/issues/120803
+         * Support for queries that have multiple SORTs that cannot become TopN
+         */
+        REMOVE_REDUNDANT_SORT;
 
         private final boolean enabled;
 

diff --git a/...lugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizer.java b/...lugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizer.java
@@ -10,7 +10,6 @@
 import org.elasticsearch.xpack.esql.VerificationException;
 import org.elasticsearch.xpack.esql.common.Failures;
 import org.elasticsearch.xpack.esql.core.type.DataType;
-import org.elasticsearch.xpack.esql.optimizer.rules.logical.AddDefaultTopN;
 import org.elasticsearch.xpack.esql.optimizer.rules.logical.BooleanFunctionEqualsElimination;
 import org.elasticsearch.xpack.esql.optimizer.rules.logical.BooleanSimplification;
 import org.elasticsearch.xpack.esql.optimizer.rules.logical.CombineBinaryComparisons;
@@ -32,7 +31,7 @@
 import org.elasticsearch.xpack.esql.optimizer.rules.logical.PruneEmptyPlans;
 import org.elasticsearch.xpack.esql.optimizer.rules.logical.PruneFilters;
 import org.elasticsearch.xpack.esql.optimizer.rules.logical.PruneLiteralsInOrderBy;
-import org.elasticsearch.xpack.esql.optimizer.rules.logical.PruneOrderByBeforeStats;
+import org.elasticsearch.xpack.esql.optimizer.rules.logical.PruneRedundantOrderBy;
 import org.elasticsearch.xpack.esql.optimizer.rules.logical.PruneRedundantSortClauses;
 import org.elasticsearch.xpack.esql.optimizer.rules.logical.PushDownAndCombineFilters;
 import org.elasticsearch.xpack.esql.optimizer.rules.logical.PushDownAndCombineLimits;
@@ -116,10 +115,9 @@ protected List<Batch<LogicalPlan>> batches() {
 
     protected static List<Batch<LogicalPlan>> rules() {
         var skip = new Batch<>("Skip Compute", new SkipQueryOnLimitZero());
-        var defaultTopN = new Batch<>("Add default TopN", new AddDefaultTopN());
         var label = new Batch<>("Set as Optimized", Limiter.ONCE, new SetAsOptimized());
 
-        return asList(substitutions(), operators(), skip, cleanup(), defaultTopN, label);
+        return asList(substitutions(), operators(), skip, cleanup(), label);
     }
 
     protected static Batch<LogicalPlan> substitutions() {
@@ -189,7 +187,7 @@ protected static Batch<LogicalPlan> operators() {
             new PushDownRegexExtract(),
             new PushDownEnrich(),
             new PushDownAndCombineOrderBy(),
-            new PruneOrderByBeforeStats(),
+            new PruneRedundantOrderBy(),
             new PruneRedundantSortClauses()
         );
     }

diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LogicalVerifier.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LogicalVerifier.java
@@ -27,6 +27,9 @@ public Failures verify(LogicalPlan plan) {
             PlanConsistencyChecker.checkPlan(p, dependencyFailures);
 
             if (failures.hasFailures() == false) {
+                if (p instanceof PostOptimizationVerificationAware pova) {
+                    pova.postOptimizationVerification(failures);
+                }
                 p.forEachExpression(ex -> {
                     if (ex instanceof PostOptimizationVerificationAware va) {
                         va.postOptimizationVerification(failures);

diff --git a/...ql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/AddDefaultTopN.java b/...ql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/AddDefaultTopN.java
diff --git a/...in/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PruneOrderByBeforeStats.java b/...in/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PruneOrderByBeforeStats.java
diff --git a/...main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PruneRedundantOrderBy.java b/...main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PruneRedundantOrderBy.java
@@ -0,0 +1,116 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.esql.optimizer.rules.logical;
+
+import org.elasticsearch.xpack.esql.plan.logical.Aggregate;
+import org.elasticsearch.xpack.esql.plan.logical.Drop;
+import org.elasticsearch.xpack.esql.plan.logical.Enrich;
+import org.elasticsearch.xpack.esql.plan.logical.Eval;
+import org.elasticsearch.xpack.esql.plan.logical.Filter;
+import org.elasticsearch.xpack.esql.plan.logical.InlineStats;
+import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan;
+import org.elasticsearch.xpack.esql.plan.logical.Lookup;
+import org.elasticsearch.xpack.esql.plan.logical.MvExpand;
+import org.elasticsearch.xpack.esql.plan.logical.OrderBy;
+import org.elasticsearch.xpack.esql.plan.logical.Project;
+import org.elasticsearch.xpack.esql.plan.logical.RegexExtract;
+import org.elasticsearch.xpack.esql.plan.logical.Rename;
+import org.elasticsearch.xpack.esql.plan.logical.TopN;
+import org.elasticsearch.xpack.esql.plan.logical.UnaryPlan;
+import org.elasticsearch.xpack.esql.plan.logical.join.Join;
+
+import java.util.ArrayList;
+import java.util.IdentityHashMap;
+import java.util.List;
+
+/**
+ * SORT cannot be executed without a LIMIT, as ES|QL doesn't support unbounded sort (yet).
+ * <p>
+ * The planner tries to push down LIMIT and transform all the unbounded sorts into a TopN.
+ * In some cases it's not possible though, eg.
+ * <p>
+ * from test | sort x | lookup join lookup on x | sort y
+ * <p>
+ * from test | sort x | mv_expand x | sort y
+ * <p>
+ * "sort y" will become a TopN, but "sort x" will remain unbounded, so the query could not be executed.
- * "sort y" will become a TopN, but "sort x" will remain unbounded, so the query could not be executed.
+ * "sort y" will become a TopN due to the addition of the default Limit, but "sort x" will remain unbounded, so the query could not be executed.
- * "sort y" will become a TopN, but "sort x" will remain unbounded, so the query could not be executed.
+ * "sort y" will become a TopN due to the addition of the default Limit, but "sort x" will remain unbounded, so the query could not be executed.
+ * <p>
+ * In most cases though, following commands can make the previous SORTs redundant,
+ * because it will re-sort previously sorted results (eg. if there is another SORT)
+ * or because the order will be scrambled by another command (eg. a STATS)
+ * <p>
+ * This rule finds and prunes redundant SORTs, attempting to make the plan executable.
+ */
+public class PruneRedundantOrderBy extends OptimizerRules.OptimizerRule<LogicalPlan> {
+
+    @Override
+    protected LogicalPlan rule(LogicalPlan plan) {
+        if (plan instanceof OrderBy || plan instanceof TopN || plan instanceof Aggregate) {
+            IdentityHashMap<OrderBy, Void> redundant = findRedundantSort(((UnaryPlan) plan).child());
+            if (redundant.isEmpty()) {
+                return plan;
+            }
+            return plan.transformUp(p -> {
+                if (redundant.containsKey(p)) {
+                    return ((OrderBy) p).child();
+                }
+                return p;
+            });
+        } else {
+            return plan;
+        }
+    }
+
+    private IdentityHashMap<OrderBy, Void> findRedundantSort(LogicalPlan plan) {
+        List<LogicalPlan> toCheck = new ArrayList<>();
+        toCheck.add(plan);
+
+        IdentityHashMap<OrderBy, Void> result = new IdentityHashMap<>();
+        LogicalPlan p = null;
+        while (true) {
+            if (p == null) {
+                if (toCheck.isEmpty()) {
+                    return result;
+                } else {
+                    p = toCheck.remove(0);
+                }
+            } else if (p instanceof OrderBy ob) {
+                result.put(ob, null);
+                p = ob.child();
+            } else if (p instanceof UnaryPlan unary) {
+                if (unary instanceof Project
+                    || unary instanceof Drop
+                    || unary instanceof Rename
+                    || unary instanceof MvExpand
+                    || unary instanceof Enrich
+                    || unary instanceof RegexExtract
+                    || unary instanceof InlineStats
+                    || unary instanceof Lookup
+                // IMPORTANT
+                // If we introduce window functions or order-sensitive aggs (eg. STREAMSTATS),
+                // the previous sort could actually become relevant
+                // so we have to be careful with plans that could use them, ie. the following
+                    || unary instanceof Filter
+                    || unary instanceof Eval
+                    || unary instanceof Aggregate) {
+                    p = unary.child();
+                } else {
+                    // stop here, other unary plans could be sensitive to SORT
+                    p = null;
+                }
+            } else if (p instanceof Join lj) {
+                toCheck.add(lj.left());
+                toCheck.add(lj.right());
+                p = null;
+            } else {
+                // stop here, other unary plans could be sensitive to SORT
+                p = null;
+            }
+        }
+    }
+}
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/OrderBy.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/OrderBy.java
@@ -10,6 +10,7 @@
 import org.elasticsearch.common.io.stream.StreamInput;
 import org.elasticsearch.common.io.stream.StreamOutput;
 import org.elasticsearch.xpack.esql.capabilities.PostAnalysisVerificationAware;
+import org.elasticsearch.xpack.esql.capabilities.PostOptimizationVerificationAware;
 import org.elasticsearch.xpack.esql.capabilities.TelemetryAware;
 import org.elasticsearch.xpack.esql.common.Failures;
 import org.elasticsearch.xpack.esql.core.capabilities.Resolvables;
@@ -25,7 +26,7 @@
 
 import static org.elasticsearch.xpack.esql.common.Failure.fail;
 
-public class OrderBy extends UnaryPlan implements PostAnalysisVerificationAware, TelemetryAware {
+public class OrderBy extends UnaryPlan implements PostAnalysisVerificationAware, PostOptimizationVerificationAware, TelemetryAware {
     public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(LogicalPlan.class, "OrderBy", OrderBy::new);
 
     private final List<Order> order;
@@ -109,4 +110,9 @@ public void postAnalysisVerification(Failures failures) {
             }
         });
     }
+
+    @Override
+    public void postOptimizationVerification(Failures failures) {
+        failures.add(fail(this, "Unbounded sort not supported yet, please add a limit"));
+    }
 }