-
Notifications
You must be signed in to change notification settings - Fork 25.6k
ESQL: Change the order of the optimization rules #124335
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
9828afc
8843569
a2c33fa
18ebafe
8beb022
26f2349
68c31b7
16311c2
a036535
89e76ef
eddde2d
dc8c35d
ef8303c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -3,7 +3,7 @@ | |
| // | ||
|
|
||
| maxOfInt | ||
| required_capability: inlinestats_v4 | ||
| required_capability: inlinestats_v5 | ||
| // tag::max-languages[] | ||
| FROM employees | ||
| | KEEP emp_no, languages | ||
|
|
@@ -25,7 +25,7 @@ emp_no:integer | languages:integer | max_lang:integer | |
| ; | ||
|
|
||
| maxOfIntByKeyword | ||
| required_capability: inlinestats_v4 | ||
| required_capability: inlinestats_v5 | ||
|
|
||
| FROM employees | ||
| | KEEP emp_no, languages, gender | ||
|
|
@@ -43,7 +43,7 @@ emp_no:integer | languages:integer | max_lang:integer | gender:keyword | |
| ; | ||
|
|
||
| maxOfLongByKeyword | ||
| required_capability: inlinestats_v4 | ||
| required_capability: inlinestats_v5 | ||
|
|
||
| FROM employees | ||
| | KEEP emp_no, avg_worked_seconds, gender | ||
|
|
@@ -58,7 +58,7 @@ emp_no:integer | avg_worked_seconds:long | max_avg_worked_seconds:long | gender: | |
| ; | ||
|
|
||
| maxOfLong | ||
| required_capability: inlinestats_v4 | ||
| required_capability: inlinestats_v5 | ||
|
|
||
| FROM employees | ||
| | KEEP emp_no, avg_worked_seconds, gender | ||
|
|
@@ -71,7 +71,7 @@ emp_no:integer | avg_worked_seconds:long | gender:keyword | max_avg_worked_secon | |
| ; | ||
|
|
||
| maxOfLongByCalculatedKeyword | ||
| required_capability: inlinestats_v4 | ||
| required_capability: inlinestats_v5 | ||
|
|
||
| // tag::longest-tenured-by-first[] | ||
| FROM employees | ||
|
|
@@ -94,7 +94,7 @@ emp_no:integer | avg_worked_seconds:long | last_name:keyword | max_avg_worked_se | |
| ; | ||
|
|
||
| maxOfLongByCalculatedNamedKeyword | ||
| required_capability: inlinestats_v4 | ||
| required_capability: inlinestats_v5 | ||
|
|
||
| FROM employees | ||
| | KEEP emp_no, avg_worked_seconds, last_name | ||
|
|
@@ -113,7 +113,7 @@ emp_no:integer | avg_worked_seconds:long | last_name:keyword | max_avg_worked_se | |
| ; | ||
|
|
||
| maxOfLongByCalculatedDroppedKeyword | ||
| required_capability: inlinestats_v4 | ||
| required_capability: inlinestats_v5 | ||
|
|
||
| FROM employees | ||
| | INLINESTATS max_avg_worked_seconds = MAX(avg_worked_seconds) BY l = SUBSTRING(last_name, 0, 1) | ||
|
|
@@ -132,7 +132,7 @@ emp_no:integer | avg_worked_seconds:long | last_name:keyword | max_avg_worked_se | |
| ; | ||
|
|
||
| maxOfLongByEvaledKeyword | ||
| required_capability: inlinestats_v4 | ||
| required_capability: inlinestats_v5 | ||
|
|
||
| FROM employees | ||
| | EVAL l = SUBSTRING(last_name, 0, 1) | ||
|
|
@@ -152,7 +152,7 @@ emp_no:integer | avg_worked_seconds:long | max_avg_worked_seconds:long | l:keywo | |
| ; | ||
|
|
||
| maxOfLongByInt | ||
| required_capability: inlinestats_v4 | ||
| required_capability: inlinestats_v5 | ||
|
|
||
| FROM employees | ||
| | KEEP emp_no, avg_worked_seconds, languages | ||
|
|
@@ -170,7 +170,7 @@ emp_no:integer | avg_worked_seconds:long | max_avg_worked_seconds:long | languag | |
| ; | ||
|
|
||
| maxOfLongByIntDouble | ||
| required_capability: inlinestats_v4 | ||
| required_capability: inlinestats_v5 | ||
|
|
||
| FROM employees | ||
| | KEEP emp_no, avg_worked_seconds, languages, height | ||
|
|
@@ -244,7 +244,7 @@ abbrev:keyword | type:keyword | scalerank:integer | min_scalerank:integer | |
| ; | ||
|
|
||
| byMvExpand | ||
| required_capability: inlinestats_v4 | ||
| required_capability: inlinestats_v5 | ||
|
|
||
| // tag::extreme-airports[] | ||
| FROM airports | ||
|
|
@@ -308,7 +308,7 @@ count:long | country:keyword | avg:double | |
| ; | ||
|
|
||
| afterWhere | ||
| required_capability: inlinestats_v4 | ||
| required_capability: inlinestats_v5 | ||
|
|
||
| FROM airports | ||
| | WHERE country != "United States" | ||
|
|
@@ -367,7 +367,7 @@ abbrev:keyword | city:keyword | region:text | "COUNT(*)":long | |
| ; | ||
|
|
||
| beforeStats | ||
| required_capability: inlinestats_v4 | ||
| required_capability: inlinestats_v5 | ||
|
|
||
| FROM airports | ||
| | EVAL lat = ST_Y(location) | ||
|
|
@@ -380,7 +380,7 @@ northern:long | southern:long | |
| ; | ||
|
|
||
| beforeKeepSort | ||
| required_capability: inlinestats_v4 | ||
| required_capability: inlinestats_v5 | ||
|
|
||
| FROM employees | ||
| | INLINESTATS max_salary = MAX(salary) by languages | ||
|
|
@@ -501,7 +501,7 @@ Zürich | Zürich | |
| ; | ||
|
|
||
| byConstant | ||
| required_capability: inlinestats_v4 | ||
| required_capability: inlinestats_v5 | ||
|
|
||
| FROM employees | ||
| | KEEP emp_no, languages | ||
|
|
@@ -520,7 +520,7 @@ emp_no:integer | languages:integer | max_lang:integer | y:integer | |
| ; | ||
|
|
||
| aggConstant | ||
| required_capability: inlinestats_v4 | ||
| required_capability: inlinestats_v5 | ||
|
|
||
| FROM employees | ||
| | KEEP emp_no | ||
|
|
@@ -538,7 +538,7 @@ one:integer | emp_no:integer | |
| ; | ||
|
|
||
| percentile | ||
| required_capability: inlinestats_v4 | ||
| required_capability: inlinestats_v5 | ||
|
|
||
| FROM employees | ||
| | KEEP emp_no, salary | ||
|
|
@@ -557,7 +557,7 @@ emp_no:integer | salary:integer | ninety_fifth_salary:double | |
| ; | ||
|
|
||
| byTwoCalculated | ||
| required_capability: inlinestats_v4 | ||
| required_capability: inlinestats_v5 | ||
|
|
||
| FROM airports | ||
| | WHERE abbrev IS NOT NULL | ||
|
|
@@ -642,7 +642,7 @@ abbrev:keyword | scalerank:integer | location:geo_point | |
| ; | ||
|
|
||
| groupShadowsField | ||
| required_capability: inlinestats_v4 | ||
| required_capability: inlinestats_v5 | ||
|
|
||
| FROM employees | ||
| | KEEP emp_no, salary, hire_date | ||
|
|
@@ -661,7 +661,7 @@ emp_no:integer | salary:integer | avg_salary:double | hire_date:datetime | |
| ; | ||
|
|
||
| groupByExpression_And_ExistentField | ||
| required_capability: inlinestats_v4 | ||
| required_capability: inlinestats_v5 | ||
| FROM employees | ||
| | KEEP emp_no, languages, gender | ||
| | EVAL x = "ABC" | ||
|
|
@@ -678,8 +678,8 @@ emp_no:integer | languages:integer | x:keyword | max_lang:integer | y:keyword | | |
| 10005 |1 |ABC |5 |abc |M | ||
| ; | ||
|
|
||
| groupByRenamedColumn-Ignore | ||
| required_capability: inlinestats_v4 | ||
| groupByRenamedColumn | ||
| required_capability: inlinestats_v5 | ||
| FROM employees | ||
| | KEEP emp_no, languages, gender | ||
| | INLINESTATS max_lang = MAX(languages) BY y = gender | ||
|
|
@@ -695,3 +695,115 @@ emp_no:integer | languages:integer | gender:keyword | max_lang:integer | y:keywo | |
| 10012 | 5 | null | 5 | null | ||
| 10014 | 5 | null | 5 | null | ||
| ; | ||
|
|
||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Some of the following tests fail with the same error. This is likely due to something that @alex-spies caught in a previous review, meaning too much is sent to the data nodes for processing, instead of being done on the coordinator. This is also tied to scenarios where multiple |
||
| // fails with AssertionError at org.elasticsearch.xpack.esql.plan.logical.Limit.writeTo(Limit.java:70) | ||
| groupByMultipleRenamedColumns_AndOneExpression_Last-Ignore | ||
|
|
||
| FROM employees | ||
| | KEEP emp_no, languages, gender, first_name | ||
| | INLINESTATS max_lang = MAX(languages) BY y = gender, l = languages, f = left(first_name,1) | ||
| | LIMIT 10 | ||
| ; | ||
|
|
||
| emp_no:integer | languages:integer | gender:keyword|first_name:keyword|max_lang:integer| y:keyword | l:integer |f:keyword | ||
| 10001 |2 |M |Georgi |2 |M |2 |G | ||
| 10002 |5 |F |Bezalel |5 |F |5 |B | ||
| 10003 |4 |M |Parto |4 |M |4 |P | ||
| 10004 |5 |M |Chirstian |5 |M |5 |C | ||
| 10005 |1 |M |Kyoichi |1 |M |1 |K | ||
| 10006 |3 |F |Anneke |3 |F |3 |A | ||
| 10007 |4 |F |Tzvetan |4 |F |4 |T | ||
| 10008 |2 |M |Saniya |2 |M |2 |S | ||
| 10009 |1 |F |Sumant |1 |F |1 |S | ||
| 10010 |4 |null |Duangkaew |4 |null |4 |D | ||
| ; | ||
|
|
||
| // fails with AssertionError at org.elasticsearch.xpack.esql.plan.logical.Limit.writeTo(Limit.java:70) | ||
| groupByMultipleRenamedColumns_AndTwoExpressions-Ignore | ||
|
|
||
| FROM employees | ||
| | KEEP emp_no, languages, gender, first_name | ||
| | INLINESTATS max_lang = MAX(languages) BY f1 = left(first_name, 1), y = gender, f2 = left(first_name, 1), l = languages | ||
| | LIMIT 10 | ||
| ; | ||
|
|
||
| emp_no:integer | languages:integer | gender:keyword|first_name:keyword|max_lang:integer| f1:keyword | y:keyword | f2:keyword |l:integer | ||
| 10001 |2 |M |Georgi |2 |G |M |G |2 | ||
| 10002 |5 |F |Bezalel |5 |B |F |B |5 | ||
| 10003 |4 |M |Parto |4 |P |M |P |4 | ||
| 10004 |5 |M |Chirstian |5 |C |M |C |5 | ||
| 10005 |1 |M |Kyoichi |1 |K |M |K |1 | ||
| 10006 |3 |F |Anneke |3 |A |F |A |3 | ||
| 10007 |4 |F |Tzvetan |4 |T |F |T |4 | ||
| 10008 |2 |M |Saniya |2 |S |M |S |2 | ||
| 10009 |1 |F |Sumant |1 |S |F |S |1 | ||
| 10010 |4 |null |Duangkaew |4 |D |null |D |4 | ||
| ; | ||
|
|
||
| // fails with AssertionError at org.elasticsearch.xpack.esql.plan.logical.Limit.writeTo(Limit.java:70) | ||
| groupByMultipleRenamedColumns_AndMultipleRenames-Ignore | ||
|
|
||
| FROM employees | ||
| | KEEP emp_no, languages, gender, first_name | ||
| | RENAME first_name as f | ||
| | INLINESTATS max_lang = MAX(languages) BY y = gender, l = languages, first_name = left(f, 1) | ||
| | LIMIT 10 | ||
| ; | ||
|
|
||
| emp_no:integer | languages:integer | gender:keyword| f:keyword |max_lang:integer| y:keyword | l:integer |first_name:keyword | ||
| 10001 |2 |M |Georgi |2 |M |2 |G | ||
| 10002 |5 |F |Bezalel |5 |F |5 |B | ||
| 10003 |4 |M |Parto |4 |M |4 |P | ||
| 10004 |5 |M |Chirstian |5 |M |5 |C | ||
| 10005 |1 |M |Kyoichi |1 |M |1 |K | ||
| 10006 |3 |F |Anneke |3 |F |3 |A | ||
| 10007 |4 |F |Tzvetan |4 |F |4 |T | ||
| 10008 |2 |M |Saniya |2 |M |2 |S | ||
| 10009 |1 |F |Sumant |1 |F |1 |S | ||
| 10010 |4 |null |Duangkaew |4 |null |4 |D | ||
| ; | ||
|
|
||
| // fails with AssertionError at org.elasticsearch.xpack.esql.plan.logical.Limit.writeTo(Limit.java:70) | ||
| groupByMultipleRenamedColumns_AndSameNameExpressionGroupingOverride-Ignore | ||
|
|
||
| FROM employees | ||
| | KEEP emp_no, languages, gender, first_name | ||
| | RENAME first_name as f | ||
| | INLINESTATS max_lang = MAX(languages) BY y = gender, l = languages, f = left(f, 1) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Subtle semantic question: The implementation assumes that expressions in the But there's a different way to understand this, and it's actually useful: I don't think we want the latter to be the semantics here. But that could be useful in case that e.g. the left hand side has a |
||
| | LIMIT 10 | ||
| ; | ||
|
|
||
| emp_no:integer | languages:integer | gender:keyword|max_lang:integer| y:keyword | l:integer |f:keyword | ||
| 10001 |2 |M |2 |M |2 |G | ||
| 10002 |5 |F |5 |F |5 |B | ||
| 10003 |4 |M |4 |M |4 |P | ||
| 10004 |5 |M |5 |M |5 |C | ||
| 10005 |1 |M |1 |M |1 |K | ||
| 10006 |3 |F |3 |F |3 |A | ||
| 10007 |4 |F |4 |F |4 |T | ||
| 10008 |2 |M |2 |M |2 |S | ||
| 10009 |1 |F |1 |F |1 |S | ||
| 10010 |4 |null |4 |null |4 |D | ||
| ; | ||
|
|
||
| twoAggregatesGroupedBy_AField_And_AnExpression | ||
| required_capability: inlinestats_v5 | ||
|
|
||
| FROM employees | ||
| | KEEP emp_no, languages, gender, last_name | ||
| | WHERE gender IS NOT NULL | ||
| | INLINESTATS max_lang = MAX(languages), min_lang = MIN(languages) BY f = left(last_name, 1), gender | ||
| | SORT last_name DESC | ||
| | LIMIT 8 | ||
| ; | ||
|
|
||
| emp_no:integer |languages:integer|last_name:keyword|max_lang:integer|min_lang:integer| f:keyword | gender:keyword | ||
| 10053 |3 |Zschoche |4 |3 |Z |F | ||
| 10083 |1 |Zockler |1 |1 |Z |M | ||
| 10007 |4 |Zielinski |4 |3 |Z |F | ||
| 10097 |3 |Waschkowski |3 |3 |W |M | ||
| 10020 |null |Warwick |3 |3 |W |M | ||
| 10043 |1 |Tzvieli |1 |1 |T |M | ||
| 10049 |5 |Tramer |5 |5 |T |F | ||
| 10028 |null |Tempesti |1 |1 |T |M | ||
| ; | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -34,7 +34,6 @@ protected LogicalPlan rule(InlineJoin plan) { | |
| // check if there's any grouping that uses a reference on the right side | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Only a small cleanup here. |
||
| // if so, look for the source until finding a StubReference | ||
| // then copy those on the left side as well | ||
|
|
||
| LogicalPlan left = plan.left(); | ||
| LogicalPlan right = plan.right(); | ||
|
|
||
|
|
@@ -46,7 +45,6 @@ protected LogicalPlan rule(InlineJoin plan) { | |
| // first checks any aggregate that declares expressions inside the grouping | ||
| // second that checks any found references to collect their declaration | ||
| right = right.transformDown(p -> { | ||
|
|
||
| if (p instanceof Aggregate aggregate) { | ||
| // collect references | ||
| for (Expression g : aggregate.groupings()) { | ||
|
|
@@ -56,6 +54,10 @@ protected LogicalPlan rule(InlineJoin plan) { | |
| } | ||
| } | ||
|
|
||
| if (groupingRefs.isEmpty()) { | ||
alex-spies marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| return p; | ||
| } | ||
|
|
||
| // find their declaration and remove it | ||
| // TODO: this doesn't take into account aliasing | ||
|
||
| if (p instanceof Eval eval) { | ||
alex-spies marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I just realized that it's probably a good idea to add tests for
INLINESTATS ... BY x = bucket(...)andINLINESTATS ... BY x = CATEGORIZE(...). I think the latter cannot work because the join key inBY x = CATEGORIZE(...)is computed during the aggregation, whereasINLINESTATSrequires the join key to be present before that.Cc @jan-elastic , I think we'll have to start out with a limitation where
INLINESTATScan't useCATEGORIZE, at least at first; to enable this, I think we'd somehow have to grab the categorizer from the first phase of the query (which computes the STATS) and make it available to the second phase of the query (which performs the joining with every row we see).There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
FWIW, I can live with the fact that the first version of INLINESTATS doesn't work with CATEGORIZE.
Just open a GitHub issue for that and it can be resolved later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I opened #124717