From b5215da22fec1bb72a44458f902216ef375545e0 Mon Sep 17 00:00:00 2001 From: Bogdan Pintea Date: Fri, 15 Aug 2025 17:19:41 +0200 Subject: [PATCH 1/5] Fix planning of dangling Project with InlineJoin This fixes a planning error leaving a Project on top of an empty LocalRelation marker unpruned. The empty EmptyRelation is added in the plan as a marker, signaling that the right hand-side of the join can be pruned entirely (and thus the entire InlineJoin). --- .../src/main/resources/inlinestats.csv-spec | 39 +++ .../rules/logical/PropagateEmptyRelation.java | 3 +- .../optimizer/rules/logical/PruneColumns.java | 28 +- .../plan/logical/local/LocalRelation.java | 6 +- .../optimizer/LogicalPlanOptimizerTests.java | 240 ++++++++++++++++-- 5 files changed, 284 insertions(+), 32 deletions(-) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/inlinestats.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/inlinestats.csv-spec index b90135f43a417..c825ac23d5ef0 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/inlinestats.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/inlinestats.csv-spec @@ -1386,6 +1386,45 @@ total_duration:long | day:date | days:date 17016205 |2023-10-23T13:00:00.000Z|[2023-10-23T12:00:00.000Z, 2023-10-23T13:00:00.000Z] ; +evalBeforeDoubleInlinestats1 +required_capability: inlinestats_v9 + +FROM employees +| EVAL salaryK = salary/1000 +| INLINESTATS count = COUNT(*) BY salaryK +| INLINESTATS min = MIN(MV_COUNT(languages)) BY salaryK +| SORT emp_no +| KEEP emp_no, still_hired, count +| LIMIT 5 +; + +emp_no:integer |still_hired:boolean|count:long +10001 |true |1 +10002 |true |3 +10003 |false |2 +10004 |true |2 +10005 |true |1 +; + +evalBeforeDoubleInlinestats2 +required_capability: inlinestats_v9 + +FROM employees +| EVAL jobs = MV_COUNT(job_positions) +| INLINESTATS count = COUNT(*) BY jobs +| INLINESTATS min = MIN(MV_COUNT(languages)) BY jobs +| SORT emp_no +| KEEP emp_no, jobs, count, min +| LIMIT 5 +; + +emp_no:integer |jobs:integer|count:long|min:long +10001 |2 |18 |1 +10002 |1 |27 |1 +10003 |null |11 |1 +10004 |4 |26 |1 +10005 |null |11 |1 +; evalBeforeInlinestatsAndKeepAfter1 required_capability: inlinestats_v9 diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PropagateEmptyRelation.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PropagateEmptyRelation.java index 200a0a9269575..b79bcfaefeb8f 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PropagateEmptyRelation.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PropagateEmptyRelation.java @@ -20,7 +20,6 @@ import org.elasticsearch.xpack.esql.plan.logical.Aggregate; import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan; import org.elasticsearch.xpack.esql.plan.logical.UnaryPlan; -import org.elasticsearch.xpack.esql.plan.logical.local.EmptyLocalSupplier; import org.elasticsearch.xpack.esql.plan.logical.local.LocalRelation; import org.elasticsearch.xpack.esql.plan.logical.local.LocalSupplier; import org.elasticsearch.xpack.esql.planner.PlannerUtils; @@ -37,7 +36,7 @@ public PropagateEmptyRelation() { @Override protected LogicalPlan rule(UnaryPlan plan, LogicalOptimizerContext ctx) { LogicalPlan p = plan; - if (plan.child() instanceof LocalRelation local && local.supplier() == EmptyLocalSupplier.EMPTY) { + if (plan.child() instanceof LocalRelation local && local.hasEmptySupplier()) { // only care about non-grouped aggs might return something (count) if (plan instanceof Aggregate agg && agg.groupings().isEmpty()) { List emptyBlocks = aggsFromEmpty(ctx.foldCtx(), agg.aggregates()); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PruneColumns.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PruneColumns.java index 2855df8d9aa41..2112b4450393b 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PruneColumns.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PruneColumns.java @@ -14,7 +14,6 @@ import org.elasticsearch.xpack.esql.core.expression.Attribute; import org.elasticsearch.xpack.esql.core.expression.AttributeSet; import org.elasticsearch.xpack.esql.core.expression.Expressions; -import org.elasticsearch.xpack.esql.core.expression.FieldAttribute; import org.elasticsearch.xpack.esql.core.expression.NamedExpression; import org.elasticsearch.xpack.esql.core.util.Holder; import org.elasticsearch.xpack.esql.plan.logical.Aggregate; @@ -79,7 +78,7 @@ private static LogicalPlan pruneColumns(LogicalPlan plan, AttributeSet.Builder u recheck.set(false); p = switch (p) { case Aggregate agg -> pruneColumnsInAggregate(agg, used, inlineJoin); - case InlineJoin inj -> pruneColumnsInInlineJoin(inj, used, recheck); + case InlineJoin inj -> pruneColumnsInInlineJoinRight(inj, used, recheck); case Eval eval -> pruneColumnsInEval(eval, used, recheck); case Project project -> inlineJoin ? pruneColumnsInProject(project, used) : p; case EsRelation esr -> pruneColumnsInEsRelation(esr, used); @@ -148,12 +147,12 @@ private static LogicalPlan pruneColumnsInAggregate(Aggregate aggregate, Attribut return p; } - private static LogicalPlan pruneColumnsInInlineJoin(InlineJoin ij, AttributeSet.Builder used, Holder recheck) { + private static LogicalPlan pruneColumnsInInlineJoinRight(InlineJoin ij, AttributeSet.Builder used, Holder recheck) { LogicalPlan p = ij; used.addAll(ij.references()); var right = pruneColumns(ij.right(), used, true); - if (right.output().isEmpty()) { + if (right.output().isEmpty() || isLocalEmptyRelation(right)) { p = ij.left(); recheck.set(true); } else if (right != ij.right()) { @@ -181,18 +180,19 @@ private static LogicalPlan pruneColumnsInEval(Eval eval, AttributeSet.Builder us return p; } + // Note: only run when the Project is a descendent of an InlineJoin. private static LogicalPlan pruneColumnsInProject(Project project, AttributeSet.Builder used) { LogicalPlan p = project; + // A project atop a stub relation will only output attributes which are already part of the INLINEJOIN left-hand side output. + if (project.child() instanceof StubRelation) { + // Use an empty relation as a marker for a subsequent pass over the InlineJoin. + return emptyLocalRelation(project); + } + var remaining = pruneUnusedAndAddReferences(project.projections(), used); if (remaining != null) { - p = remaining.isEmpty() || remaining.stream().allMatch(FieldAttribute.class::isInstance) - ? emptyLocalRelation(project) - : new Project(project.source(), project.child(), remaining); - } else if (project.output().stream().allMatch(FieldAttribute.class::isInstance)) { - // Use empty relation as a marker for a subsequent pass, in case the project is only outputting field attributes (which are - // already part of the INLINEJOIN left-hand side output). - p = emptyLocalRelation(project); + p = remaining.isEmpty() ? emptyLocalRelation(project) : new Project(project.source(), project.child(), remaining); } return p; @@ -216,7 +216,11 @@ private static LogicalPlan pruneColumnsInEsRelation(EsRelation esr, AttributeSet private static LogicalPlan emptyLocalRelation(LogicalPlan plan) { // create an empty local relation with no attributes - return new LocalRelation(plan.source(), List.of(), EmptyLocalSupplier.EMPTY); + return new LocalRelation(plan.source(), plan.output(), EmptyLocalSupplier.EMPTY); + } + + private static boolean isLocalEmptyRelation(LogicalPlan plan) { + return plan instanceof LocalRelation local && local.hasEmptySupplier(); } /** diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/local/LocalRelation.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/local/LocalRelation.java index 7a83fd800ab8e..a086ed89660a2 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/local/LocalRelation.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/local/LocalRelation.java @@ -55,7 +55,7 @@ public void writeTo(StreamOutput out) throws IOException { if (out.getTransportVersion().onOrAfter(TransportVersions.ESQL_LOCAL_RELATION_WITH_NEW_BLOCKS)) { out.writeNamedWriteable(supplier); } else { - if (supplier == EmptyLocalSupplier.EMPTY) { + if (hasEmptySupplier()) { out.writeVInt(0); } else {// here we can only have an ImmediateLocalSupplier as this was the only implementation apart from EMPTY ((ImmediateLocalSupplier) supplier).writeTo(out); @@ -77,6 +77,10 @@ public LocalSupplier supplier() { return supplier; } + public boolean hasEmptySupplier() { + return supplier == EmptyLocalSupplier.EMPTY; + } + @Override public boolean expressionsResolved() { return true; diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java index e3f13fc331cdc..d182dc1f2197e 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java @@ -6106,12 +6106,10 @@ private static boolean releaseBuildForInlinestats(String query) { return false; } - /** - *
{@code
+    /*
      * Project[[emp_no{f}#12 AS x#8, emp_no{f}#12]]
      * \_TopN[[Order[emp_no{f}#12,ASC,LAST]],1[INTEGER]]
      *   \_EsRelation[test][_meta_field{f}#18, emp_no{f}#12, first_name{f}#13, ..]
-     * }
*/ public void testInlinestatsGetsPrunedEntirely() { var query = """ @@ -6134,7 +6132,60 @@ public void testInlinestatsGetsPrunedEntirely() { var relation = as(topN.child(), EsRelation.class); } - // same as above + /* + * EsqlProject[[emp_no{f}#16, count{r}#7]] + * \_TopN[[Order[emp_no{f}#16,ASC,LAST]],5[INTEGER]] + * \_InlineJoin[LEFT,[salaryK{r}#5],[salaryK{r}#5],[salaryK{r}#5]] + * |_Eval[[salary{f}#21 / 1000[INTEGER] AS salaryK#5]] + * | \_EsRelation[test][_meta_field{f}#22, emp_no{f}#16, first_name{f}#17, ..] + * \_Aggregate[[salaryK{r}#5],[COUNT(*[KEYWORD],true[BOOLEAN]) AS count#7, salaryK{r}#5]] + * \_StubRelation[[_meta_field{f}#22, emp_no{f}#16, first_name{f}#17, gender{f}#18, hire_date{f}#23, job{f}#24, job.raw{f}#25, + * languages{f}#19, last_name{f}#20, long_noidx{f}#26, salary{f}#21, salaryK{r}#5]] + */ + public void testDoubleInlinestatsWithEvalGetsPrunedEntirely() { + var query = """ + FROM employees + | SORT languages DESC + | EVAL salaryK = salary/1000 + | INLINESTATS count = COUNT(*) BY salaryK + | INLINESTATS min = MIN(MV_COUNT(languages)) BY salaryK + | KEEP emp_no, count + | SORT emp_no + | LIMIT 5 + """; + if (releaseBuildForInlinestats(query)) { + return; + } + var plan = optimizedPlan(query); + System.err.println("XXX\n" + plan); + + var project = as(plan, EsqlProject.class); + assertThat(Expressions.names(project.projections()), is(List.of("emp_no", "count"))); + var topN = as(project.child(), TopN.class); + assertThat(topN.order().size(), is(1)); + var order = as(topN.order().get(0), Order.class); + assertThat(order.direction(), equalTo(Order.OrderDirection.ASC)); + assertThat(order.nullsPosition(), equalTo(Order.NullsPosition.LAST)); + var ref = as(order.child(), FieldAttribute.class); + assertThat(ref.name(), is("emp_no")); + var inlineJoin = as(topN.child(), InlineJoin.class); + assertThat(Expressions.names(inlineJoin.config().matchFields()), is(List.of("salaryK"))); + // Left + var eval = as(inlineJoin.left(), Eval.class); + assertThat(Expressions.names(eval.fields()), is(List.of("salaryK"))); + var relation = as(eval.child(), EsRelation.class); + // Right + var agg = as(inlineJoin.right(), Aggregate.class); + assertThat(Expressions.names(agg.groupings()), is(List.of("salaryK"))); + assertThat(Expressions.names(agg.aggregates()), is(List.of("count", "salaryK"))); + var stub = as(agg.child(), StubRelation.class); + } + + /* + * Project[[emp_no{f}#19 AS x#15, emp_no{f}#19]] + * \_TopN[[Order[emp_no{f}#19,ASC,LAST]],1[INTEGER]] + * \_EsRelation[test][_meta_field{f}#25, emp_no{f}#19, first_name{f}#20, ..] + */ public void testDoubleInlinestatsGetsPrunedEntirely() { var query = """ FROM employees @@ -6158,8 +6209,7 @@ public void testDoubleInlinestatsGetsPrunedEntirely() { var relation = as(topN.child(), EsRelation.class); } - /** - *
{@code
+    /*
      * Project[[emp_no{f}#15 AS x#11, a{r}#7, emp_no{f}#15]]
      * \_Limit[1[INTEGER],true]
      *   \_InlineJoin[LEFT,[emp_no{f}#15],[emp_no{f}#15],[emp_no{r}#15]]
@@ -6168,7 +6218,6 @@ public void testDoubleInlinestatsGetsPrunedEntirely() {
      *     \_Aggregate[[emp_no{f}#15],[COUNTDISTINCT(languages{f}#18,true[BOOLEAN]) AS a#7, emp_no{f}#15]]
      *       \_StubRelation[[_meta_field{f}#21, emp_no{f}#15, first_name{f}#16, gender{f}#17, hire_date{f}#22, job{f}#23, job.raw{f}#24, l
      *          anguages{f}#18, last_name{f}#19, long_noidx{f}#25, salary{f}#20]]
-     * }
*/ public void testInlinestatsGetsPrunedPartially() { var query = """ @@ -6199,7 +6248,7 @@ public void testInlinestatsGetsPrunedPartially() { } // same as above - public void testTrippleInlinestatsGetsPrunedPartially() { + public void testTripleInlinestatsGetsPrunedPartially() { var query = """ FROM employees | INLINESTATS x = AVG(salary), a = COUNT_DISTINCT(languages) BY emp_no @@ -6229,14 +6278,175 @@ public void testTrippleInlinestatsGetsPrunedPartially() { var stub = as(agg.child(), StubRelation.class); } - /** - *
{@code
+    /*
+     * EsqlProject[[emp_no{f}#864, salaryK{r}#836, count{r}#838, min{r}#852]]
+     * \_TopN[[Order[emp_no{f}#864,ASC,LAST]],5[INTEGER]]
+     *   \_InlineJoin[LEFT,[salaryK{r}#836],[salaryK{r}#836],[salaryK{r}#836]]
+     *     |_Dissect[hire_date_string{r}#842,Parser[pattern=%{date}, appendSeparator=,
+     *          parser=org.elasticsearch.dissect.DissectParser@27d57d5e],[date{r}#847]] <-- TODO: Dissect & Eval could/should be dropped
+     *     | \_Eval[[TOSTRING(hire_date{f}#865) AS hire_date_string#842]]
+     *     |   \_InlineJoin[LEFT,[salaryK{r}#836],[salaryK{r}#836],[salaryK{r}#836]]
+     *     |     |_Eval[[salary{f}#866 / 10000[INTEGER] AS salaryK#836]]
+     *     |     | \_EsRelation[employees][emp_no{f}#864, hire_date{f}#865, languages{f}#860, ..]
+     *     |     \_Aggregate[[salaryK{r}#836],[COUNT(*[KEYWORD],true[BOOLEAN]) AS count#838, salaryK{r}#836]]
+     *     |       \_StubRelation[[emp_no{f}#864, hire_date{f}#865, languages{f}#860, languages.byte{f}#861, languages.long{f}#863,
+     *                  languages.short{f}#862, salary{f}#866, salaryK{r}#836]]
+     *     \_Aggregate[[salaryK{r}#836],[MIN($$MV_COUNT(langua>$MIN$0{r$}#867,true[BOOLEAN]) AS min#852, salaryK{r}#836]]
+     *       \_Eval[[MVCOUNT(languages{f}#860) AS $$MV_COUNT(langua>$MIN$0#867]]
+     *         \_StubRelation[[emp_no{f}#864, hire_date{f}#865, languages{f}#860, languages.byte{f}#861, languages.long{f}#863,
+     *              languages.short{f}#862, salary{f}#866, count{r}#838, salaryK{r}#836, sum{r}#845, hire_date_string{r}#842, date{r}#847]]
+     */
+    public void testTripleInlinestatsMultipleAssignmentsGetsPrunedPartially() {
+        // TODO: reenable 1st sort, pull the 2nd further up when #132417 is in
+        var query = """
+            FROM employees
+            // | SORT languages DESC
+            | EVAL salaryK = salary / 10000
+            | INLINESTATS count = COUNT(*) BY salaryK
+            | EVAL hire_date_string = hire_date::keyword
+            | INLINESTATS sum = SUM(languages) BY hire_date_string
+            | DISSECT hire_date_string "%{date}"
+            | INLINESTATS min = MIN(MV_COUNT(languages)) BY salaryK
+            | SORT emp_no
+            | KEEP emp_no, salaryK, count, min
+            | LIMIT 5
+            """;
+        if (releaseBuildForInlinestats(query)) {
+            return;
+        }
+        var plan = optimizedPlan(query);
+
+        var employeesFields = List.of(
+            "_meta_field",
+            "emp_no",
+            "first_name",
+            "gender",
+            "hire_date",
+            "job",
+            "job.raw",
+            "languages",
+            "last_name",
+            "long_noidx",
+            "salary"
+        );
+
+        var project = as(plan, EsqlProject.class);
+        assertThat(Expressions.names(project.projections()), is(List.of("emp_no", "salaryK", "count", "min")));
+        var topN = as(project.child(), TopN.class);
+        var outerinline = as(topN.child(), InlineJoin.class);
+        //
+        var expectedOutterOutput = new ArrayList<>(employeesFields);
+        expectedOutterOutput.addAll(List.of("count", "hire_date_string", "date", "min", "salaryK"));
+        assertThat(Expressions.names(outerinline.output()), is(expectedOutterOutput));
+        // outer left
+        var dissect = as(outerinline.left(), Dissect.class);
+        var eval = as(dissect.child(), Eval.class);
+        var innerinline = as(eval.child(), InlineJoin.class);
+        var expectedInnerOutput = new ArrayList<>(employeesFields);
+        expectedInnerOutput.addAll(List.of("count", "salaryK"));
+        assertThat(Expressions.names(innerinline.output()), is(expectedInnerOutput));
+        // inner left
+        eval = as(innerinline.left(), Eval.class);
+        var relation = as(eval.child(), EsRelation.class);
+        // inner right
+        var agg = as(innerinline.right(), Aggregate.class);
+        var stub = as(agg.child(), StubRelation.class);
+        // outer right
+        agg = as(outerinline.right(), Aggregate.class);
+        eval = as(agg.child(), Eval.class);
+        stub = as(eval.child(), StubRelation.class);
+    }
+
+    /*
+     * EsqlProject[[emp_no{f}#917]]
+     * \_TopN[[Order[emp_no{f}#917,ASC,LAST]],5[INTEGER]]
+     *   \_Dissect[hire_date_string{r}#898,Parser[pattern=%{date}, appendSeparator=,
+     *          parser=org.elasticsearch.dissect.DissectParser@46132aa7],[date{r}#903]] <-- TODO: Dissect & Eval could/should be dropped
+     *     \_Eval[[TOSTRING(hire_date{f}#918) AS hire_date_string#898]]
+     *       \_EsRelation[employees][emp_no{f}#917, hire_date{f}#918, languages{f}#913, ..]
+     */
+    public void testTripleInlinestatsMultipleAssignmentsGetsPrunedEntirely() {
+        // same as the above query, but only keep emp_no
+        var query = """
+            FROM employees
+            // | SORT languages DESC
+            | EVAL salaryK = salary / 10000
+            | INLINESTATS count = COUNT(*) BY salaryK
+            | EVAL hire_date_string = hire_date::keyword
+            | INLINESTATS sum = SUM(languages) BY hire_date_string
+            | DISSECT hire_date_string "%{date}"
+            | INLINESTATS min = MIN(MV_COUNT(languages)) BY salaryK
+            | SORT emp_no
+            | KEEP emp_no
+            | LIMIT 5
+            """;
+        if (releaseBuildForInlinestats(query)) {
+            return;
+        }
+        var plan = optimizedPlan(query);
+
+        var project = as(plan, EsqlProject.class);
+        assertThat(Expressions.names(project.projections()), is(List.of("emp_no")));
+        var topN = as(project.child(), TopN.class);
+        var dissect = as(topN.child(), Dissect.class);
+        var eval = as(dissect.child(), Eval.class);
+        var relation = as(eval.child(), EsRelation.class);
+    }
+
+    /*
+     * Project[[emp_no{f}#1556]]
+     * \_TopN[[Order[emp_no{f}#1556,ASC,LAST]],5[INTEGER]]
+     *   \_Join[LEFT,[languages{f}#1552],[languages{f}#1552],[language_code{f}#1561]]
+     *     |_Join[LEFT,[languages{f}#1552],[languages{f}#1552],[language_code{f}#1560]]
+     *     | |_Join[LEFT,[languages{f}#1552],[languages{f}#1552],[language_code{f}#1559]]
+     *     | | |_EsRelation[employees][emp_no{f}#1556, hire_date{f}#1557, languages{f}#155..]
+     *     | | \_EsRelation[languages_lookup][LOOKUP][language_code{f}#1559]
+     *     | \_EsRelation[languages_lookup][LOOKUP][language_code{f}#1560]
+     *     \_EsRelation[languages_lookup][LOOKUP][language_code{f}#1561]
+     */
+    public void testTripleInlinestatsWithLookupJoinGetsPrunedEntirely() {
+        var query = """
+            FROM employees
+            // | SORT languages DESC
+            | EVAL salaryK = salary / 10000
+            | EVAL language_code = languages
+            | LOOKUP JOIN languages_lookup ON language_code
+            | INLINESTATS count = COUNT(*) BY salaryK
+            | EVAL hire_date_string = hire_date::keyword
+            | LOOKUP JOIN languages_lookup ON language_code
+            | INLINESTATS sum = SUM(languages) BY hire_date_string
+            | LOOKUP JOIN languages_lookup ON language_code
+            | INLINESTATS min = MIN(MV_COUNT(languages)) BY salaryK
+            | SORT emp_no
+            | KEEP emp_no
+            | LIMIT 5
+            """;
+        if (releaseBuildForInlinestats(query)) {
+            return;
+        }
+        var plan = optimizedPlan(query);
+
+        var project = as(plan, Project.class);
+        assertThat(Expressions.names(project.projections()), is(List.of("emp_no")));
+        var topN = as(project.child(), TopN.class);
+
+        var outterjoin = as(topN.child(), Join.class);
+        var middlejoin = as(outterjoin.left(), Join.class);
+        var innerjoin = as(middlejoin.left(), Join.class);
+
+        var innerJoinLeftRelation = as(innerjoin.left(), EsRelation.class);
+        var innerJoinRightRelation = as(innerjoin.right(), EsRelation.class);
+
+        var middleJoinRightRelation = as(middlejoin.right(), EsRelation.class);
+        var outerJoinRightRelation = as(outterjoin.right(), EsRelation.class);
+    }
+
+    /*
      * Project[[abbrev{f}#19, scalerank{f}#21 AS backup_scalerank#4, language_name{f}#28 AS scalerank#11]]
      * \_TopN[[Order[abbrev{f}#19,DESC,FIRST]],5[INTEGER]]
      *   \_Join[LEFT,[scalerank{f}#21],[scalerank{f}#21],[language_code{f}#27]]
      *     |_EsRelation[airports][abbrev{f}#19, city{f}#25, city_location{f}#26, coun..]
      *     \_EsRelation[languages_lookup][LOOKUP][language_code{f}#27, language_name{f}#28]
-     * }
*/ public void testInlinestatsWithLookupJoin() { var query = """ @@ -6273,8 +6483,7 @@ public void testInlinestatsWithLookupJoin() { assertThat(right.concreteIndices(), is(Set.of("languages_lookup"))); } - /** - *
{@code
+    /*
      * EsqlProject[[avg{r}#4, emp_no{f}#9, first_name{f}#10]]
      * \_Limit[10[INTEGER],true]
      *   \_InlineJoin[LEFT,[emp_no{f}#9],[emp_no{f}#9],[emp_no{r}#9]]
@@ -6286,7 +6495,6 @@ public void testInlinestatsWithLookupJoin() {
      *              avg$1#21, emp_no{f}#9]]
      *           \_StubRelation[[_meta_field{f}#15, emp_no{f}#9, first_name{f}#10, gender{f}#11, hire_date{f}#16, job{f}#17, job.raw{f}#18,
      *              languages{f}#12, last_name{f}#13, long_noidx{f}#19, salary{f}#14]]
-     * }
*/ public void testInlinestatsWithAvg() { var query = """ @@ -6318,13 +6526,11 @@ public void testInlinestatsWithAvg() { var stub = as(agg.child(), StubRelation.class); } - /** - *
{@code
+    /*
      * EsqlProject[[emp_no{r}#5]]
      * \_Limit[1000[INTEGER],false]
      *   \_LocalRelation[[salary{r}#3, emp_no{r}#5, gender{r}#7],
      *      org.elasticsearch.xpack.esql.plan.logical.local.CopyingLocalSupplier@9d5b596d]
-     * }
*/ public void testInlinestatsWithRow() { var query = """ From 1faee2ab8a958b4751a03940e073cfb953743fdc Mon Sep 17 00:00:00 2001 From: Bogdan Pintea Date: Fri, 15 Aug 2025 18:03:18 +0200 Subject: [PATCH 2/5] remove leftover --- .../xpack/esql/optimizer/LogicalPlanOptimizerTests.java | 1 - 1 file changed, 1 deletion(-) diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java index d182dc1f2197e..3beafe08db602 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java @@ -6157,7 +6157,6 @@ public void testDoubleInlinestatsWithEvalGetsPrunedEntirely() { return; } var plan = optimizedPlan(query); - System.err.println("XXX\n" + plan); var project = as(plan, EsqlProject.class); assertThat(Expressions.names(project.projections()), is(List.of("emp_no", "count"))); From 6f32325294614b5d1f0d60c63f7b3919629d92ae Mon Sep 17 00:00:00 2001 From: Bogdan Pintea Date: Fri, 15 Aug 2025 21:29:14 +0200 Subject: [PATCH 3/5] update type interestingly, it fails in CsvTests, but not in EsqlSpecIT, which accepts MIN(MV_COUNT(integer)) as both long and integer type. --- .../qa/testFixtures/src/main/resources/inlinestats.csv-spec | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/inlinestats.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/inlinestats.csv-spec index c825ac23d5ef0..aeac03fe76b47 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/inlinestats.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/inlinestats.csv-spec @@ -1418,7 +1418,7 @@ FROM employees | LIMIT 5 ; -emp_no:integer |jobs:integer|count:long|min:long +emp_no:integer |jobs:integer|count:long|min:integer 10001 |2 |18 |1 10002 |1 |27 |1 10003 |null |11 |1 From de48c456ec94cb79ac2f00828af10b2d8c6b249f Mon Sep 17 00:00:00 2001 From: Bogdan Pintea Date: Tue, 19 Aug 2025 20:17:56 +0200 Subject: [PATCH 4/5] Simplify further the pruning The case where an agg on the right hand-side of an InlineJoin was pruned emitted unnecessary nodes in the case when only groups remained available. --- .../optimizer/rules/logical/PruneColumns.java | 27 +----- .../optimizer/LogicalPlanOptimizerTests.java | 95 +++++++++++++++++++ 2 files changed, 98 insertions(+), 24 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PruneColumns.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PruneColumns.java index 2112b4450393b..d4b580739c3e3 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PruneColumns.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PruneColumns.java @@ -25,7 +25,6 @@ import org.elasticsearch.xpack.esql.plan.logical.Project; import org.elasticsearch.xpack.esql.plan.logical.Sample; import org.elasticsearch.xpack.esql.plan.logical.join.InlineJoin; -import org.elasticsearch.xpack.esql.plan.logical.join.StubRelation; import org.elasticsearch.xpack.esql.plan.logical.local.EmptyLocalSupplier; import org.elasticsearch.xpack.esql.plan.logical.local.LocalRelation; import org.elasticsearch.xpack.esql.plan.logical.local.LocalSupplier; @@ -122,23 +121,9 @@ private static LogicalPlan pruneColumnsInAggregate(Aggregate aggregate, Attribut } else { // not expecting high groups cardinality, nested loops in lists should be fine, no need for a HashSet if (inlineJoin && aggregate.groupings().containsAll(remaining)) { - // It's an INLINEJOIN and all remaining attributes are groupings, which are already part of the IJ output (from the - // left-hand side). - // TODO: INLINESTATS: revisit condition when adding support for INLINESTATS filters - if (aggregate.child() instanceof StubRelation stub) { - var message = "Aggregate groups references [" - + remaining - + "] not in child's (StubRelation) output: [" - + stub.outputSet() - + "]"; - assert stub.outputSet().containsAll(Expressions.asAttributes(remaining)) : message; - - p = emptyLocalRelation(aggregate); - } else { - // There are no aggregates to compute, just output the groupings; these are already in the IJ output, so only - // restrict the output to what remained. - p = new Project(aggregate.source(), aggregate.child(), remaining); - } + // An INLINEJOIN right-hand side aggregation output had everything pruned, except for (some of the) groupings, which are + // already part of the IJ output (from the left-hand side): the agg can just be dropped entirely. + p = emptyLocalRelation(aggregate); } else { // not an INLINEJOIN or there are actually aggregates to compute p = aggregate.with(aggregate.groupings(), remaining); } @@ -184,12 +169,6 @@ private static LogicalPlan pruneColumnsInEval(Eval eval, AttributeSet.Builder us private static LogicalPlan pruneColumnsInProject(Project project, AttributeSet.Builder used) { LogicalPlan p = project; - // A project atop a stub relation will only output attributes which are already part of the INLINEJOIN left-hand side output. - if (project.child() instanceof StubRelation) { - // Use an empty relation as a marker for a subsequent pass over the InlineJoin. - return emptyLocalRelation(project); - } - var remaining = pruneUnusedAndAddReferences(project.projections(), used); if (remaining != null) { p = remaining.isEmpty() ? emptyLocalRelation(project) : new Project(project.source(), project.child(), remaining); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java index 3beafe08db602..c68ea6dbb8348 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java @@ -6440,6 +6440,101 @@ public void testTripleInlinestatsWithLookupJoinGetsPrunedEntirely() { var outerJoinRightRelation = as(outterjoin.right(), EsRelation.class); } + /* + * Project[[avg{r}#14, decades{r}#10]] + * \_Eval[[$$SUM$avg$0{r$}#35 / $$COUNT$avg$1{r$}#36 AS avg#14]] + * \_Limit[1000[INTEGER],false] + * \_Aggregate[[decades{r}#10],[SUM(salary{f}#29,true[BOOLEAN],compensated[KEYWORD]) AS $$SUM$avg$0#35, COUNT(salary{f}#29, + * true[BOOLEAN]) AS $$COUNT$avg$1#36, decades{r}#10]] + * \_Eval[[DATEDIFF(years[KEYWORD],hire_date{f}#31,1755625790494[DATETIME]) AS age#4, age{r}#4 / 10[INTEGER] AS decades#7, + * decades{r}#7 * 10[INTEGER] AS decades#10]] + * \_EsRelation[test][_meta_field{f}#30, emp_no{f}#24, first_name{f}#25, ..] + */ + public void testInlineStatsWithAggGetsPrunedEntirely() { + var query = """ + FROM employees + | EVAL age = DATE_DIFF("years", hire_date, NOW()) + | EVAL decades = age / 10, decades = decades * 10 + | STATS avg = AVG(salary) BY decades + | EVAL idecades = decades / 2 + | INLINESTATS iavg = AVG(avg) BY idecades + | KEEP avg, decades + """; + + if (releaseBuildForInlinestats(query)) { + return; + } + var plan = optimizedPlan(query); + + var project = as(plan, Project.class); + assertThat(Expressions.names(project.projections()), is(List.of("avg", "decades"))); + var eval = as(project.child(), Eval.class); + var limit = asLimit(eval.child(), 1000, false); + var aggregate = as(limit.child(), Aggregate.class); + eval = as(aggregate.child(), Eval.class); + var source = as(eval.child(), EsRelation.class); + } + + /* + * EsqlProject[[avg{r}#1053, decades{r}#1049, avgavg{r}#1063]] + * \_Limit[1000[INTEGER],true] + * \_InlineJoin[LEFT,[],[],[]] + * |_Project[[avg{r}#1053, decades{r}#1049, idecades{r}#1056]] + * | \_Eval[[$$SUM$avg$0{r$}#1073 / $$COUNT$avg$1{r$}#1074 AS avg#1053, decades{r}#1049 / 2[INTEGER] AS idecades#1056]] + * | \_Limit[1000[INTEGER],false] + * | \_Aggregate[[decades{r}#1049],[SUM(salary{f}#1072,true[BOOLEAN],compensated[KEYWORD]) AS $$SUM$avg$0#1073, + * COUNT(salary{f}#1072,true[BOOLEAN]) AS $$COUNT$avg$1#1074, decades{r}#1049]] + * | \_Eval[[DATEDIFF(years[KEYWORD],birth_date{f}#1071,1755626308505[DATETIME]) AS age#1043, age{r}#1043 / 10[INTEGER] AS + * decades#1046, decades{r}#1046 * 10[INTEGER] AS decades#1049]] + * | \_EsRelation[employees][birth_date{f}#1071, salary{f}#1072] + * \_Project[[avgavg{r}#1063]] + * \_Eval[[$$SUM$avgavg$0{r$}#1077 / $$COUNT$avgavg$1{r$}#1078 AS avgavg#1063]] + * \_Aggregate[[],[SUM(avg{r}#1053,true[BOOLEAN],compensated[KEYWORD]) AS $$SUM$avgavg$0#1077, + * COUNT(avg{r}#1053,true[BOOLEAN]) AS $$COUNT$avgavg$1#1078]] + * \_StubRelation[[avg{r}#1053, decades{r}#1049, iavg{r}#1059, idecades{r}#1056]] + */ + public void testInlineStatsWithAggAndInlineStatsGetsPruned() { + var query = """ + FROM employees + | EVAL age = DATE_DIFF("years", hire_date, NOW()) + | EVAL decades = age / 10, decades = decades * 10 + | STATS avg = AVG(salary) BY decades + | EVAL idecades = decades / 2 + | INLINESTATS iavg = AVG(avg) BY idecades + | INLINESTATS avgavg = AVG(avg) + | KEEP avg, decades, avgavg + """; + + if (releaseBuildForInlinestats(query)) { + return; + } + var plan = optimizedPlan(query); + + var project = as(plan, EsqlProject.class); + assertThat(Expressions.names(project.projections()), is(List.of("avg", "decades", "avgavg"))); + var limit = asLimit(project.child(), 1000, true); + var inlineJoin = as(limit.child(), InlineJoin.class); + + // Left branch: Project with avg, decades, idecades + var leftProject = as(inlineJoin.left(), Project.class); + assertThat(Expressions.names(leftProject.projections()), is(List.of("avg", "decades", "idecades"))); + var leftEval = as(leftProject.child(), Eval.class); + var leftLimit = asLimit(leftEval.child(), 1000, false); + var leftAggregate = as(leftLimit.child(), Aggregate.class); + assertThat(Expressions.names(leftAggregate.output()), is(List.of("$$SUM$avg$0", "$$COUNT$avg$1", "decades"))); + var leftEval2 = as(leftAggregate.child(), Eval.class); + var leftRelation = as(leftEval2.child(), EsRelation.class); + + // Right branch: Project with avgavg + var rightProject = as(inlineJoin.right(), Project.class); + assertThat(Expressions.names(rightProject.projections()), is(List.of("avgavg"))); + var rightEval = as(rightProject.child(), Eval.class); + var rightAggregate = as(rightEval.child(), Aggregate.class); + assertThat(Expressions.names(rightAggregate.output()), is(List.of("$$SUM$avgavg$0", "$$COUNT$avgavg$1"))); + assertThat(rightAggregate.groupings(), empty()); + var rightStub = as(rightAggregate.child(), StubRelation.class); + } + /* * Project[[abbrev{f}#19, scalerank{f}#21 AS backup_scalerank#4, language_name{f}#28 AS scalerank#11]] * \_TopN[[Order[abbrev{f}#19,DESC,FIRST]],5[INTEGER]] From d55db438eaa682dc2c2d03cbca28547ca1a15b54 Mon Sep 17 00:00:00 2001 From: Bogdan Pintea Date: Tue, 26 Aug 2025 12:18:14 +0200 Subject: [PATCH 5/5] bump required cap version in tests --- .../qa/testFixtures/src/main/resources/inlinestats.csv-spec | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/inlinestats.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/inlinestats.csv-spec index 43fcb7db36704..bc7732cae4c5f 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/inlinestats.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/inlinestats.csv-spec @@ -1377,7 +1377,7 @@ total_duration:long | day:date | days:date ; evalBeforeDoubleInlinestats1 -required_capability: inlinestats_v9 +required_capability: inlinestats_v10 FROM employees | EVAL salaryK = salary/1000 @@ -1397,7 +1397,7 @@ emp_no:integer |still_hired:boolean|count:long ; evalBeforeDoubleInlinestats2 -required_capability: inlinestats_v9 +required_capability: inlinestats_v10 FROM employees | EVAL jobs = MV_COUNT(job_positions)