diff --git a/docs/changelog/124335.yaml b/docs/changelog/124335.yaml new file mode 100644 index 0000000000000..a7730b9099281 --- /dev/null +++ b/docs/changelog/124335.yaml @@ -0,0 +1,5 @@ +pr: 124335 +summary: Change the order of the optimization rules +area: ES|QL +type: bug +issues: [] diff --git a/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/MultiClusterSpecIT.java b/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/MultiClusterSpecIT.java index ea80eba92cef8..5610db793abe5 100644 --- a/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/MultiClusterSpecIT.java +++ b/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/MultiClusterSpecIT.java @@ -48,7 +48,7 @@ import static org.elasticsearch.xpack.esql.EsqlTestUtils.classpathResources; import static org.elasticsearch.xpack.esql.action.EsqlCapabilities.Cap.INLINESTATS; import static org.elasticsearch.xpack.esql.action.EsqlCapabilities.Cap.INLINESTATS_V2; -import static org.elasticsearch.xpack.esql.action.EsqlCapabilities.Cap.INLINESTATS_V4; +import static org.elasticsearch.xpack.esql.action.EsqlCapabilities.Cap.INLINESTATS_V5; import static org.elasticsearch.xpack.esql.action.EsqlCapabilities.Cap.JOIN_LOOKUP_V12; import static org.elasticsearch.xpack.esql.action.EsqlCapabilities.Cap.JOIN_PLANNING_V1; import static org.elasticsearch.xpack.esql.action.EsqlCapabilities.Cap.METADATA_FIELDS_REMOTE_TEST; @@ -126,7 +126,7 @@ protected void shouldSkipTest(String testName) throws IOException { assumeFalse("INLINESTATS not yet supported in CCS", testCase.requiredCapabilities.contains(INLINESTATS.capabilityName())); assumeFalse("INLINESTATS not yet supported in CCS", testCase.requiredCapabilities.contains(INLINESTATS_V2.capabilityName())); assumeFalse("INLINESTATS not yet supported in CCS", testCase.requiredCapabilities.contains(JOIN_PLANNING_V1.capabilityName())); - assumeFalse("INLINESTATS not yet supported in CCS", testCase.requiredCapabilities.contains(INLINESTATS_V4.capabilityName())); + assumeFalse("INLINESTATS not yet supported in CCS", testCase.requiredCapabilities.contains(INLINESTATS_V5.capabilityName())); assumeFalse("LOOKUP JOIN not yet supported in CCS", testCase.requiredCapabilities.contains(JOIN_LOOKUP_V12.capabilityName())); // Unmapped fields require a coorect capability response from every cluster, which isn't currently implemented. assumeFalse("UNMAPPED FIELDS not yet supported in CCS", testCase.requiredCapabilities.contains(UNMAPPED_FIELDS.capabilityName())); diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/inlinestats.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/inlinestats.csv-spec index 9843066a68a34..7b64ee6b1c938 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/inlinestats.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/inlinestats.csv-spec @@ -3,7 +3,7 @@ // maxOfInt -required_capability: inlinestats_v4 +required_capability: inlinestats_v5 // tag::max-languages[] FROM employees | KEEP emp_no, languages @@ -25,7 +25,7 @@ emp_no:integer | languages:integer | max_lang:integer ; maxOfIntByKeyword -required_capability: inlinestats_v4 +required_capability: inlinestats_v5 FROM employees | KEEP emp_no, languages, gender @@ -43,7 +43,7 @@ emp_no:integer | languages:integer | max_lang:integer | gender:keyword ; maxOfLongByKeyword -required_capability: inlinestats_v4 +required_capability: inlinestats_v5 FROM employees | KEEP emp_no, avg_worked_seconds, gender @@ -58,7 +58,7 @@ emp_no:integer | avg_worked_seconds:long | max_avg_worked_seconds:long | gender: ; maxOfLong -required_capability: inlinestats_v4 +required_capability: inlinestats_v5 FROM employees | KEEP emp_no, avg_worked_seconds, gender @@ -71,7 +71,7 @@ emp_no:integer | avg_worked_seconds:long | gender:keyword | max_avg_worked_secon ; maxOfLongByCalculatedKeyword -required_capability: inlinestats_v4 +required_capability: inlinestats_v5 // tag::longest-tenured-by-first[] FROM employees @@ -94,7 +94,7 @@ emp_no:integer | avg_worked_seconds:long | last_name:keyword | max_avg_worked_se ; maxOfLongByCalculatedNamedKeyword -required_capability: inlinestats_v4 +required_capability: inlinestats_v5 FROM employees | KEEP emp_no, avg_worked_seconds, last_name @@ -113,7 +113,7 @@ emp_no:integer | avg_worked_seconds:long | last_name:keyword | max_avg_worked_se ; maxOfLongByCalculatedDroppedKeyword -required_capability: inlinestats_v4 +required_capability: inlinestats_v5 FROM employees | INLINESTATS max_avg_worked_seconds = MAX(avg_worked_seconds) BY l = SUBSTRING(last_name, 0, 1) @@ -132,7 +132,7 @@ emp_no:integer | avg_worked_seconds:long | last_name:keyword | max_avg_worked_se ; maxOfLongByEvaledKeyword -required_capability: inlinestats_v4 +required_capability: inlinestats_v5 FROM employees | EVAL l = SUBSTRING(last_name, 0, 1) @@ -152,7 +152,7 @@ emp_no:integer | avg_worked_seconds:long | max_avg_worked_seconds:long | l:keywo ; maxOfLongByInt -required_capability: inlinestats_v4 +required_capability: inlinestats_v5 FROM employees | KEEP emp_no, avg_worked_seconds, languages @@ -170,7 +170,7 @@ emp_no:integer | avg_worked_seconds:long | max_avg_worked_seconds:long | languag ; maxOfLongByIntDouble -required_capability: inlinestats_v4 +required_capability: inlinestats_v5 FROM employees | KEEP emp_no, avg_worked_seconds, languages, height @@ -244,7 +244,7 @@ abbrev:keyword | type:keyword | scalerank:integer | min_scalerank:integer ; byMvExpand -required_capability: inlinestats_v4 +required_capability: inlinestats_v5 // tag::extreme-airports[] FROM airports @@ -308,7 +308,7 @@ count:long | country:keyword | avg:double ; afterWhere -required_capability: inlinestats_v4 +required_capability: inlinestats_v5 FROM airports | WHERE country != "United States" @@ -367,7 +367,7 @@ abbrev:keyword | city:keyword | region:text | "COUNT(*)":long ; beforeStats -required_capability: inlinestats_v4 +required_capability: inlinestats_v5 FROM airports | EVAL lat = ST_Y(location) @@ -380,7 +380,7 @@ northern:long | southern:long ; beforeKeepSort -required_capability: inlinestats_v4 +required_capability: inlinestats_v5 FROM employees | INLINESTATS max_salary = MAX(salary) by languages @@ -501,7 +501,7 @@ Zürich | Zürich ; byConstant -required_capability: inlinestats_v4 +required_capability: inlinestats_v5 FROM employees | KEEP emp_no, languages @@ -520,7 +520,7 @@ emp_no:integer | languages:integer | max_lang:integer | y:integer ; aggConstant -required_capability: inlinestats_v4 +required_capability: inlinestats_v5 FROM employees | KEEP emp_no @@ -538,7 +538,7 @@ one:integer | emp_no:integer ; percentile -required_capability: inlinestats_v4 +required_capability: inlinestats_v5 FROM employees | KEEP emp_no, salary @@ -557,7 +557,7 @@ emp_no:integer | salary:integer | ninety_fifth_salary:double ; byTwoCalculated -required_capability: inlinestats_v4 +required_capability: inlinestats_v5 FROM airports | WHERE abbrev IS NOT NULL @@ -642,7 +642,7 @@ abbrev:keyword | scalerank:integer | location:geo_point ; groupShadowsField -required_capability: inlinestats_v4 +required_capability: inlinestats_v5 FROM employees | KEEP emp_no, salary, hire_date @@ -661,7 +661,7 @@ emp_no:integer | salary:integer | avg_salary:double | hire_date:datetime ; groupByExpression_And_ExistentField -required_capability: inlinestats_v4 +required_capability: inlinestats_v5 FROM employees | KEEP emp_no, languages, gender | EVAL x = "ABC" @@ -678,8 +678,8 @@ emp_no:integer | languages:integer | x:keyword | max_lang:integer | y:keyword | 10005 |1 |ABC |5 |abc |M ; -groupByRenamedColumn-Ignore -required_capability: inlinestats_v4 +groupByRenamedColumn +required_capability: inlinestats_v5 FROM employees | KEEP emp_no, languages, gender | INLINESTATS max_lang = MAX(languages) BY y = gender @@ -695,3 +695,217 @@ emp_no:integer | languages:integer | gender:keyword | max_lang:integer | y:keywo 10012 | 5 | null | 5 | null 10014 | 5 | null | 5 | null ; + +// fails with AssertionError at org.elasticsearch.xpack.esql.plan.logical.Limit.writeTo(Limit.java:70) +groupByMultipleRenamedColumns_AndOneExpression_Last-Ignore + +FROM employees +| KEEP emp_no, languages, gender, first_name +| INLINESTATS max_lang = MAX(languages) BY y = gender, l = languages, f = left(first_name,1) +| LIMIT 10 +; + +emp_no:integer | languages:integer | gender:keyword|first_name:keyword|max_lang:integer| y:keyword | l:integer |f:keyword +10001 |2 |M |Georgi |2 |M |2 |G +10002 |5 |F |Bezalel |5 |F |5 |B +10003 |4 |M |Parto |4 |M |4 |P +10004 |5 |M |Chirstian |5 |M |5 |C +10005 |1 |M |Kyoichi |1 |M |1 |K +10006 |3 |F |Anneke |3 |F |3 |A +10007 |4 |F |Tzvetan |4 |F |4 |T +10008 |2 |M |Saniya |2 |M |2 |S +10009 |1 |F |Sumant |1 |F |1 |S +10010 |4 |null |Duangkaew |4 |null |4 |D +; + +// fails with AssertionError at org.elasticsearch.xpack.esql.plan.logical.Limit.writeTo(Limit.java:70) +groupByMultipleRenamedColumns_AndTwoExpressions-Ignore + +FROM employees +| KEEP emp_no, languages, gender, first_name +| INLINESTATS max_lang = MAX(languages) BY f1 = left(first_name, 1), y = gender, f2 = left(first_name, 1), l = languages +| LIMIT 10 +; + +emp_no:integer | languages:integer | gender:keyword|first_name:keyword|max_lang:integer| f1:keyword | y:keyword | f2:keyword |l:integer +10001 |2 |M |Georgi |2 |G |M |G |2 +10002 |5 |F |Bezalel |5 |B |F |B |5 +10003 |4 |M |Parto |4 |P |M |P |4 +10004 |5 |M |Chirstian |5 |C |M |C |5 +10005 |1 |M |Kyoichi |1 |K |M |K |1 +10006 |3 |F |Anneke |3 |A |F |A |3 +10007 |4 |F |Tzvetan |4 |T |F |T |4 +10008 |2 |M |Saniya |2 |S |M |S |2 +10009 |1 |F |Sumant |1 |S |F |S |1 +10010 |4 |null |Duangkaew |4 |D |null |D |4 +; + +// fails with AssertionError at org.elasticsearch.xpack.esql.plan.logical.Limit.writeTo(Limit.java:70) +groupByMultipleRenamedColumns_AndMultipleRenames-Ignore + +FROM employees +| KEEP emp_no, languages, gender, first_name +| RENAME first_name as f +| INLINESTATS max_lang = MAX(languages) BY y = gender, l = languages, first_name = left(f, 1) +| LIMIT 10 +; + +emp_no:integer | languages:integer | gender:keyword| f:keyword |max_lang:integer| y:keyword | l:integer |first_name:keyword +10001 |2 |M |Georgi |2 |M |2 |G +10002 |5 |F |Bezalel |5 |F |5 |B +10003 |4 |M |Parto |4 |M |4 |P +10004 |5 |M |Chirstian |5 |M |5 |C +10005 |1 |M |Kyoichi |1 |M |1 |K +10006 |3 |F |Anneke |3 |F |3 |A +10007 |4 |F |Tzvetan |4 |F |4 |T +10008 |2 |M |Saniya |2 |M |2 |S +10009 |1 |F |Sumant |1 |F |1 |S +10010 |4 |null |Duangkaew |4 |null |4 |D +; + +// fails with AssertionError at org.elasticsearch.xpack.esql.plan.logical.Limit.writeTo(Limit.java:70) +groupByMultipleRenamedColumns_AndSameNameExpressionGroupingOverride-Ignore + +FROM employees +| KEEP emp_no, languages, gender, first_name +| RENAME first_name as f +| INLINESTATS max_lang = MAX(languages) BY y = gender, l = languages, f = left(f, 1) +| LIMIT 10 +; + +emp_no:integer | languages:integer | gender:keyword|max_lang:integer| y:keyword | l:integer |f:keyword +10001 |2 |M |2 |M |2 |G +10002 |5 |F |5 |F |5 |B +10003 |4 |M |4 |M |4 |P +10004 |5 |M |5 |M |5 |C +10005 |1 |M |1 |M |1 |K +10006 |3 |F |3 |F |3 |A +10007 |4 |F |4 |F |4 |T +10008 |2 |M |2 |M |2 |S +10009 |1 |F |1 |F |1 |S +10010 |4 |null |4 |null |4 |D +; + +twoAggregatesGroupedBy_AField_And_AnExpression +required_capability: inlinestats_v5 + +FROM employees +| KEEP emp_no, languages, gender, last_name +| WHERE gender IS NOT NULL +| INLINESTATS max_lang = MAX(languages), min_lang = MIN(languages) BY f = left(last_name, 1), gender +| SORT last_name DESC +| LIMIT 8 +; + +emp_no:integer |languages:integer|last_name:keyword|max_lang:integer|min_lang:integer| f:keyword | gender:keyword +10053 |3 |Zschoche |4 |3 |Z |F +10083 |1 |Zockler |1 |1 |Z |M +10007 |4 |Zielinski |4 |3 |Z |F +10097 |3 |Waschkowski |3 |3 |W |M +10020 |null |Warwick |3 |3 |W |M +10043 |1 |Tzvieli |1 |1 |T |M +10049 |5 |Tramer |5 |5 |T |F +10028 |null |Tempesti |1 |1 |T |M +; + +groupByMultipleRenamedColumns_InversedOrder +required_capability: inlinestats_v5 + +FROM employees +| KEEP emp_no, languages, still_hired, gender +| INLINESTATS max_lang = MAX(languages) BY y = gender, z = still_hired +| SORT emp_no ASC +| LIMIT 5 +; + +emp_no:integer |languages:integer|still_hired:boolean| gender:keyword|max_lang:integer| y:keyword | z:boolean +10001 |2 |true |M |5 |M |true +10002 |5 |true |F |5 |F |true +10003 |4 |false |M |5 |M |false +10004 |5 |true |M |5 |M |true +10005 |1 |true |M |5 |M |true +; + +groupByMultipleRenamedColumns_InversedOrder_ComplexEval +required_capability: inlinestats_v5 + +FROM employees +| KEEP emp_no, languages, still_hired, gender +| EVAL multilingual = CASE(languages <= 1, "monolingual", languages <= 2, "bilingual", "polyglot") +| INLINESTATS max_lang = MAX(languages) BY y = gender, z = still_hired +| SORT emp_no ASC +| LIMIT 5 +; + +emp_no:integer |languages:integer|still_hired:boolean| gender:keyword|multilingual:keyword|max_lang:integer| y:keyword | z:boolean +10001 |2 |true |M |bilingual |5 |M |true +10002 |5 |true |F |polyglot |5 |F |true +10003 |4 |false |M |polyglot |5 |M |false +10004 |5 |true |M |polyglot |5 |M |true +10005 |1 |true |M |monolingual |5 |M |true +; + +groupByMultipleRenamedColumns_AndComplexEval +required_capability: inlinestats_v5 + +FROM employees +| KEEP emp_no, languages, still_hired, gender +| EVAL multilingual = CASE(languages <= 1, "monolingual", languages <= 2, "bilingual", "polyglot") +| INLINESTATS max_lang = MAX(languages) BY y = gender, z = still_hired, m = multilingual +| SORT emp_no ASC +| LIMIT 5 +; + +emp_no:integer |languages:integer|still_hired:boolean| gender:keyword|multilingual:keyword|max_lang:integer| y:keyword | z:boolean |m:keyword +10001 |2 |true |M |bilingual |2 |M |true |bilingual +10002 |5 |true |F |polyglot |5 |F |true |polyglot +10003 |4 |false |M |polyglot |5 |M |false |polyglot +10004 |5 |true |M |polyglot |5 |M |true |polyglot +10005 |1 |true |M |monolingual |1 |M |true |monolingual +; + +// fails with AssertionError at org.elasticsearch.xpack.esql.plan.logical.Limit.writeTo(Limit.java:70) +groupByMultipleRenamedColumns_AndConstantValue-Ignore + +FROM employees +| KEEP emp_no, languages, gender, first_name +| EVAL x = "ABC" +| INLINESTATS max_lang = MAX(languages) BY y = gender, l = languages, f = to_lower(x) +| LIMIT 10 +; + +emp_no:integer |languages:integer|gender:keyword |first_name:keyword | x:keyword |max_lang:integer| y:keyword | l:integer |f:keyword +10001 |2 |M |Georgi |ABC |2 |M |2 |abc +10002 |5 |F |Bezalel |ABC |5 |F |5 |abc +10003 |4 |M |Parto |ABC |4 |M |4 |abc +10004 |5 |M |Chirstian |ABC |5 |M |5 |abc +10005 |1 |M |Kyoichi |ABC |1 |M |1 |abc +10006 |3 |F |Anneke |ABC |3 |F |3 |abc +10007 |4 |F |Tzvetan |ABC |4 |F |4 |abc +10008 |2 |M |Saniya |ABC |2 |M |2 |abc +10009 |1 |F |Sumant |ABC |1 |F |1 |abc +10010 |4 |null |Duangkaew |ABC |4 |null |4 |abc +; + +groupByRenamedExpression +required_capability: inlinestats_v5 + +FROM employees +| KEEP emp_no, languages, gender, last_name +| WHERE gender IS NOT NULL +| INLINESTATS max_lang = MAX(languages), min_lang = MIN(languages) BY f = left(last_name, 1), gender +| SORT last_name DESC +| LIMIT 8 +; + +emp_no:integer |languages:integer|last_name:keyword|max_lang:integer|min_lang:integer| f:keyword | gender:keyword +10053 |3 |Zschoche |4 |3 |Z |F +10083 |1 |Zockler |1 |1 |Z |M +10007 |4 |Zielinski |4 |3 |Z |F +10097 |3 |Waschkowski |3 |3 |W |M +10020 |null |Warwick |3 |3 |W |M +10043 |1 |Tzvieli |1 |1 |T |M +10049 |5 |Tramer |5 |5 |T |F +10028 |null |Tempesti |1 |1 |T |M +; + diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index 0fa12d8d7d4c9..c9d42f007ffc9 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -845,7 +845,7 @@ public enum Cap { * Fixes a series of issues with inlinestats which had an incomplete implementation after lookup and inlinestats * were refactored. */ - INLINESTATS_V4(EsqlPlugin.INLINESTATS_FEATURE_FLAG), + INLINESTATS_V5(EsqlPlugin.INLINESTATS_FEATURE_FLAG), /** * Support partial_results diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizer.java index 5fcf7d35b4760..1be66890c6429 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizer.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizer.java @@ -140,13 +140,15 @@ protected static Batch substitutions() { // translate metric aggregates after surrogate substitution and replace nested expressions with eval (again) new TranslateMetricsAggregate(), new ReplaceAggregateNestedExpressionWithEval(), + // this one needs to be placed before ReplaceAliasingEvalWithProject, so that any potential aliasing eval (eval x = y) + // is not replaced with a Project before the eval to be copied on the left hand side of an InlineJoin + new PropagateInlineEvals(), new ReplaceRegexMatch(), new ReplaceTrivialTypeConversions(), new ReplaceAliasingEvalWithProject(), new SkipQueryOnEmptyMappings(), new SubstituteSpatialSurrogates(), - new ReplaceOrderByExpressionWithEval(), - new PropagateInlineEvals() + new ReplaceOrderByExpressionWithEval() // new NormalizeAggregate(), - waits on https://github.com/elastic/elasticsearch/issues/100634 ); } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PropagateInlineEvals.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PropagateInlineEvals.java index d5f131f9f9cef..1d921a5037b6f 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PropagateInlineEvals.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PropagateInlineEvals.java @@ -34,19 +34,18 @@ protected LogicalPlan rule(InlineJoin plan) { // check if there's any grouping that uses a reference on the right side // if so, look for the source until finding a StubReference // then copy those on the left side as well - LogicalPlan left = plan.left(); LogicalPlan right = plan.right(); // grouping references List groupingAlias = new ArrayList<>(); + // TODO: replace this with AttributeSet Map groupingRefs = new LinkedHashMap<>(); // perform only one iteration that does two things // first checks any aggregate that declares expressions inside the grouping // second that checks any found references to collect their declaration right = right.transformDown(p -> { - if (p instanceof Aggregate aggregate) { // collect references for (Expression g : aggregate.groupings()) { @@ -56,24 +55,26 @@ protected LogicalPlan rule(InlineJoin plan) { } } + if (groupingRefs.isEmpty()) { + return p; + } + // find their declaration and remove it - // TODO: this doesn't take into account aliasing if (p instanceof Eval eval) { - if (groupingRefs.size() > 0) { - List fields = eval.fields(); - List remainingEvals = new ArrayList<>(fields.size()); - for (Alias f : fields) { - if (groupingRefs.remove(f.name()) != null) { - groupingAlias.add(f); - } else { - remainingEvals.add(f); - } - } - if (remainingEvals.size() != fields.size()) { - // if all fields are moved, replace the eval - p = remainingEvals.size() == 0 ? eval.child() : new Eval(eval.source(), eval.child(), remainingEvals); + List fields = eval.fields(); + List remainingEvals = new ArrayList<>(fields.size()); + for (Alias f : fields) { + // TODO: look into identifying refs by their NameIds instead + if (groupingRefs.remove(f.name()) != null) { + groupingAlias.add(f); + } else { + remainingEvals.add(f); } } + if (remainingEvals.size() != fields.size()) { + // if all fields are moved, replace the eval + p = remainingEvals.size() == 0 ? eval.child() : new Eval(eval.source(), eval.child(), remainingEvals); + } } return p; }); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java index 90c08001f4cfa..fb3a2651b5060 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java @@ -214,8 +214,8 @@ public class LogicalPlanOptimizerTests extends ESTestCase { private static EnrichResolution enrichResolution; private static final LiteralsOnTheRight LITERALS_ON_THE_RIGHT = new LiteralsOnTheRight(); - private static class SubstitutionOnlyOptimizer extends LogicalPlanOptimizer { - static SubstitutionOnlyOptimizer INSTANCE = new SubstitutionOnlyOptimizer(unboundLogicalOptimizerContext()); + public static class SubstitutionOnlyOptimizer extends LogicalPlanOptimizer { + public static SubstitutionOnlyOptimizer INSTANCE = new SubstitutionOnlyOptimizer(unboundLogicalOptimizerContext()); SubstitutionOnlyOptimizer(LogicalOptimizerContext optimizerContext) { super(optimizerContext); @@ -6078,10 +6078,6 @@ public void testSimplifyComparisonArithmeticWithFloatsAndDirectionChange() { doTestSimplifyComparisonArithmetics("float * -2 < 4", "float", GT, -2d); } - private void assertNullLiteral(Expression expression) { - assertNull(as(expression, Literal.class).value()); - } - @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/108519") public void testSimplifyComparisonArithmeticSkippedOnIntegerArithmeticalOverflow() { assertNotSimplified("integer - 1 " + randomBinaryComparison() + " " + Long.MAX_VALUE); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PropagateInlineEvalsTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PropagateInlineEvalsTests.java new file mode 100644 index 0000000000000..3cbfe49d46a33 --- /dev/null +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PropagateInlineEvalsTests.java @@ -0,0 +1,174 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.optimizer.rules.logical; + +import org.elasticsearch.index.IndexMode; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xpack.esql.EsqlTestUtils; +import org.elasticsearch.xpack.esql.analysis.Analyzer; +import org.elasticsearch.xpack.esql.analysis.AnalyzerContext; +import org.elasticsearch.xpack.esql.analysis.EnrichResolution; +import org.elasticsearch.xpack.esql.core.expression.Expressions; +import org.elasticsearch.xpack.esql.core.expression.ReferenceAttribute; +import org.elasticsearch.xpack.esql.core.type.EsField; +import org.elasticsearch.xpack.esql.expression.function.EsqlFunctionRegistry; +import org.elasticsearch.xpack.esql.index.EsIndex; +import org.elasticsearch.xpack.esql.index.IndexResolution; +import org.elasticsearch.xpack.esql.optimizer.LogicalPlanOptimizer; +import org.elasticsearch.xpack.esql.optimizer.LogicalPlanOptimizerTests; +import org.elasticsearch.xpack.esql.parser.EsqlParser; +import org.elasticsearch.xpack.esql.plan.logical.Aggregate; +import org.elasticsearch.xpack.esql.plan.logical.EsRelation; +import org.elasticsearch.xpack.esql.plan.logical.Eval; +import org.elasticsearch.xpack.esql.plan.logical.Limit; +import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan; +import org.elasticsearch.xpack.esql.plan.logical.join.InlineJoin; +import org.elasticsearch.xpack.esql.plan.logical.join.StubRelation; +import org.elasticsearch.xpack.esql.plan.logical.local.EsqlProject; +import org.junit.BeforeClass; + +import java.util.List; +import java.util.Map; + +import static org.elasticsearch.xpack.esql.EsqlTestUtils.TEST_VERIFIER; +import static org.elasticsearch.xpack.esql.EsqlTestUtils.as; +import static org.elasticsearch.xpack.esql.EsqlTestUtils.loadMapping; +import static org.elasticsearch.xpack.esql.EsqlTestUtils.withDefaultLimitWarning; +import static org.elasticsearch.xpack.esql.analysis.AnalyzerTestUtils.defaultLookupResolution; +import static org.hamcrest.Matchers.contains; +import static org.hamcrest.Matchers.hasSize; +import static org.hamcrest.Matchers.is; + +public class PropagateInlineEvalsTests extends ESTestCase { + + private static EsqlParser parser; + private static Map mapping; + private static Analyzer analyzer; + + @BeforeClass + public static void init() { + parser = new EsqlParser(); + mapping = loadMapping("mapping-basic.json"); + EsIndex test = new EsIndex("test", mapping, Map.of("test", IndexMode.STANDARD)); + IndexResolution getIndexResult = IndexResolution.valid(test); + analyzer = new Analyzer( + new AnalyzerContext( + EsqlTestUtils.TEST_CFG, + new EsqlFunctionRegistry(), + getIndexResult, + defaultLookupResolution(), + new EnrichResolution() + ), + TEST_VERIFIER + ); + } + + /** + * Expects after running the {@link LogicalPlanOptimizer#substitutions()}: + * + * Limit[1000[INTEGER],false] + * \_InlineJoin[LEFT,[y{r}#10],[y{r}#10],[y{r}#10]] + * |_Eval[[gender{f}#13 AS y]] + * | \_EsqlProject[[emp_no{f}#11, languages{f}#14, gender{f}#13]] + * | \_EsRelation[test][_meta_field{f}#17, emp_no{f}#11, first_name{f}#12, ..] + * \_Aggregate[STANDARD,[y{r}#10],[MAX(languages{f}#14,true[BOOLEAN]) AS max_lang, y{r}#10]] + * \_StubRelation[[emp_no{f}#11, languages{f}#14, gender{f}#13, y{r}#10]] + */ + public void testGroupingAliasingMoved_To_LeftSideOfJoin() { + var plan = plan(""" + from test + | keep emp_no, languages, gender + | inlinestats max_lang = MAX(languages) BY y = gender + """, LogicalPlanOptimizerTests.SubstitutionOnlyOptimizer.INSTANCE); + + var limit = as(plan, Limit.class); + var inline = as(limit.child(), InlineJoin.class); + var leftEval = as(inline.left(), Eval.class); + var project = as(leftEval.child(), EsqlProject.class); + + assertThat(Expressions.names(project.projections()), contains("emp_no", "languages", "gender")); + + as(project.child(), EsRelation.class); + var rightAgg = as(inline.right(), Aggregate.class); + var stubRelation = as(rightAgg.child(), StubRelation.class); + assertThat(Expressions.names(stubRelation.expressions()), contains("emp_no", "languages", "gender", "y")); + + var groupings = rightAgg.groupings(); + var aggs = rightAgg.aggregates(); + var ref = as(groupings.get(0), ReferenceAttribute.class); + assertThat(aggs.get(1), is(ref)); + assertThat(leftEval.fields(), hasSize(1)); + assertThat(leftEval.fields().get(0).toAttribute(), is(ref)); // the only grouping is passed as eval on the join's left hand side + assertThat(leftEval.fields().get(0).name(), is("y")); + } + + /** + * Expects after running the {@link LogicalPlanOptimizer#substitutions()}: + * Limit[1000[INTEGER],false] + * \_InlineJoin[LEFT,[f{r}#18, g{r}#21, first_name_l{r}#9],[f{r}#18, g{r}#21, first_name_l{r}#9],[f{r}#18, g{r}#21, first_name_l{ + * r}#9]] + * |_Eval[[LEFT(last_name{f}#27,1[INTEGER]) AS f, gender{f}#25 AS g]] + * | \_Eval[[LEFT(first_name{f}#24,1[INTEGER]) AS first_name_l]] + * | \_EsqlProject[[emp_no{f}#23, languages{f}#26, gender{f}#25, last_name{f}#27, first_name{f}#24]] + * | \_EsRelation[test][_meta_field{f}#29, emp_no{f}#23, first_name{f}#24, ..] + * \_Aggregate[STANDARD,[f{r}#18, g{r}#21, first_name_l{r}#9],[MAX(languages{f}#26,true[BOOLEAN]) AS max_lang, MIN(languages{f} + * #26,true[BOOLEAN]) AS min_lang, f{r}#18, g{r}#21, first_name_l{r}#9]] + * \_StubRelation[[emp_no{f}#23, languages{f}#26, gender{f}#25, last_name{f}#27, first_name{f}#24, first_name_l{r}#9, f{r}#18, g + * {r}#21]] + */ + public void testGroupingAliasingMoved_To_LeftSideOfJoin_WithExpression() { + var plan = plan(""" + from test + | keep emp_no, languages, gender, last_name, first_name + | eval first_name_l = left(first_name, 1) + | inlinestats max_lang = MAX(languages), min_lang = MIN(languages) BY f = left(last_name, 1), g = gender, first_name_l + """, LogicalPlanOptimizerTests.SubstitutionOnlyOptimizer.INSTANCE); + + var limit = as(plan, Limit.class); + var inline = as(limit.child(), InlineJoin.class); + var leftEval1 = as(inline.left(), Eval.class); + var leftEval2 = as(leftEval1.child(), Eval.class); + var project = as(leftEval2.child(), EsqlProject.class); + + assertThat(Expressions.names(project.projections()), contains("emp_no", "languages", "gender", "last_name", "first_name")); + + as(project.child(), EsRelation.class); + var rightAgg = as(inline.right(), Aggregate.class); + var stubRelation = as(rightAgg.child(), StubRelation.class); + assertThat( + Expressions.names(stubRelation.expressions()), + contains("emp_no", "languages", "gender", "last_name", "first_name", "first_name_l", "f", "g") + ); + + var groupings = rightAgg.groupings(); + assertThat(groupings, hasSize(3)); + var aggs = rightAgg.aggregates(); + var ref1 = as(groupings.get(0), ReferenceAttribute.class); // f = left(last_name, 1) + var ref2 = as(groupings.get(1), ReferenceAttribute.class); // g = gender + var ref3 = as(groupings.get(2), ReferenceAttribute.class); // first_name_l + assertThat(aggs.get(2), is(ref1)); + assertThat(aggs.get(3), is(ref2)); + assertThat(leftEval1.fields(), hasSize(2)); + assertThat(leftEval1.fields().get(0).toAttribute(), is(ref1)); // f = left(last_name, 1) + assertThat(leftEval1.fields().get(0).name(), is("f")); + assertThat(leftEval1.fields().get(1).toAttribute(), is(ref2)); // g = gender + assertThat(leftEval1.fields().get(1).name(), is("g")); + assertThat(leftEval2.fields(), hasSize(1)); + assertThat(leftEval2.fields().get(0).toAttribute(), is(ref3)); // first_name_l is in the second eval (the one the user added) + assertThat(leftEval2.fields().get(0).name(), is("first_name_l")); + } + + private LogicalPlan plan(String query, LogicalPlanOptimizer optimizer) { + return optimizer.optimize(analyzer.analyze(parser.createStatement(query))); + } + + @Override + protected List filteredWarnings() { + return withDefaultLimitWarning(super.filteredWarnings()); + } +}