Skip to content

Commit 467bc2a

Browse files
authored
ESQL: Pull OrderBy followed by InlineJoin on top of it (elastic#137648)
Add optimisation rule to pull `OrderBy` above `InlineJoin`. This is required since otherwise the `OrderBy` won't be moved out of the left hand-side of the join, ending up as a stand-alone node that can't be turned into an executable -- it'll just be rejected by the verification as an "unbounded sort". Since the `InlineJoin` is sort agnostic, the `OrderBy` can be moved upwards past it, ending up as a top `TopN`. This doesn't entirely solve the issue of unbounded sorts, however: some nodes, such as `MV_EXPAND` or `LOOKUP JOIN` break the inbound sort, so pulling a `SORT` on top of them isn't possible. In this cases the query will still fail. This is not `INLINE JOIN` specific, though. This is a revival of elastic#132417, but with a new approach on how to deal with the attributes used by `SORT`, but dropped or overshadowed by the time `INLINE JOIN` is reached. In this case, we'll add temporary internal attributes kept until the INLINE JOIN and dropped afterwards. Related elastic#124715 (elastic#113727) Related elastic#124721 (elastic#133120)
1 parent 8e7e97f commit 467bc2a

File tree

19 files changed

+1620
-20
lines changed

19 files changed

+1620
-20
lines changed

docs/changelog/137648.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 137648
2+
summary: Pull `OrderBy` followed by `InlineJoin` on top of it
3+
area: ES|QL
4+
type: enhancement
5+
issues: []

x-pack/plugin/esql/qa/testFixtures/src/main/resources/inlinestats.csv-spec

Lines changed: 334 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -399,24 +399,22 @@ emp_no:integer |avg_worked_seconds:long|avg_avg_worked_seconds:double|languages:
399399
10023 |330870342 |3.181719481E8 |null |5 |1 |5 |F
400400
;
401401

402-
// TODO: INLINE STATS unit test needed for this one
403-
// https://github.com/elastic/elasticsearch/issues/113727
404-
pushDownSort_To_LeftSideOnly-Ignore
405-
required_capability: inline_stats
402+
pushDownSort_To_LeftSideOnly
403+
required_capability: inline_stats_preceeded_by_sort
406404

407405
from employees
408406
| sort emp_no
409-
| inline stats avg = avg(salary) by languages
407+
| inline stats avg = round(avg(salary)) by languages
410408
| limit 5
411409
| keep emp_no, avg, languages, gender
412410
;
413411

414-
emp_no:integer| avg:double |languages:integer|gender:keyword
415-
10001 |57305.0 |2 |M
416-
10002 |46272.5 |5 |F
417-
10003 |61805.0 |4 |M
418-
10004 |46272.5 |5 |M
419-
10005 |63528.0 |1 |M
412+
emp_no:integer| avg:double |languages:integer|gender:keyword
413+
10001 |48179.0 |2 |M
414+
10002 |41681.0 |5 |F
415+
10003 |47733.0 |4 |M
416+
10004 |41681.0 |5 |M
417+
10005 |50577.0 |1 |M
420418
;
421419

422420
byMultivaluedSimple
@@ -4416,6 +4414,331 @@ c:long
44164414
1
44174415
;
44184416

4417+
sortBeforeInlinestats1
4418+
required_capability: inline_stats_preceeded_by_sort
4419+
4420+
ROW salary = 12300, emp_no = 5, gender = "F"
4421+
| EVAL salaryK = salary/1000
4422+
| SORT salaryK DESC
4423+
| INLINE STATS sum = SUM(salaryK) BY gender
4424+
| KEEP emp_no
4425+
;
4426+
4427+
emp_no:integer
4428+
5
4429+
;
4430+
4431+
sortBeforeInlinestats2
4432+
required_capability: inline_stats_preceeded_by_sort
4433+
4434+
FROM employees
4435+
| SORT emp_no
4436+
| EVAL salaryK = salary/1000
4437+
| INLINE STATS count = COUNT(*) BY salaryK
4438+
| KEEP emp_no, still_hired, count
4439+
| LIMIT 5
4440+
;
4441+
4442+
emp_no:integer |still_hired:boolean|count:long
4443+
10001 |true |1
4444+
10002 |true |3
4445+
10003 |false |2
4446+
10004 |true |2
4447+
10005 |true |1
4448+
;
4449+
4450+
// TODO: fails with: java.lang.AssertionError: expected no concrete indices without data node plan
4451+
sortBeforeInlinestats3-Ignore
4452+
required_capability: inline_stats_preceeded_by_sort
4453+
4454+
FROM employees
4455+
| SORT languages DESC
4456+
| EVAL salaryK = salary/1000
4457+
| INLINE STATS count = COUNT(*) BY salaryK
4458+
| SORT emp_no
4459+
| INLINE STATS min = MIN(MV_COUNT(languages)) BY salaryK
4460+
| KEEP emp_no, still_hired, count
4461+
| LIMIT 5
4462+
;
4463+
4464+
emp_no:integer |still_hired:boolean|count:long
4465+
10001 |true |1
4466+
10002 |true |3
4467+
10003 |false |2
4468+
10004 |true |2
4469+
10005 |true |1
4470+
;
4471+
4472+
// same as `afterLookup`, swapped SORT position
4473+
sortBeforeInlinestatsAndLookupJoin
4474+
required_capability: inline_stats_preceeded_by_sort
4475+
required_capability: join_lookup_v12
4476+
4477+
FROM airports
4478+
| EVAL backup_scalerank = scalerank
4479+
| RENAME scalerank AS language_code
4480+
// | SORT abbrev DESC // this would work here, but not in a FORK config (GenerativeForkIT), which disables column pruning
4481+
| LOOKUP JOIN languages_lookup ON language_code
4482+
| RENAME language_name as scalerank
4483+
| DROP language_code
4484+
| INLINE STATS count=COUNT(*) BY scalerank
4485+
| SORT abbrev DESC
4486+
| KEEP abbrev, *scalerank
4487+
| LIMIT 5
4488+
;
4489+
4490+
abbrev:keyword |backup_scalerank:integer| scalerank:keyword
4491+
null |8 |null
4492+
null |8 |null
4493+
null |8 |null
4494+
ZRH |3 |Spanish
4495+
ZNZ |4 |German
4496+
;
4497+
4498+
// same as `shadowingAggregateByNextGrouping`, swapped SORT position
4499+
sortBeforeDoubleInlinestats
4500+
required_capability: inline_stats_preceeded_by_sort
4501+
4502+
FROM employees
4503+
| KEEP gender, languages, emp_no, salary
4504+
| SORT emp_no
4505+
| INLINE STATS gender = count_distinct(gender) BY languages
4506+
| INLINE STATS avg(salary) BY gender
4507+
| LIMIT 3
4508+
;
4509+
4510+
emp_no:integer |salary:integer |languages:integer|avg(salary):double|gender:long
4511+
10001 |57305 |2 |48248.55 |2
4512+
10002 |56371 |5 |48248.55 |2
4513+
10003 |61805 |4 |48248.55 |2
4514+
;
4515+
4516+
inlineStatsAfterSortDoubled
4517+
required_capability: inline_stats_preceeded_by_sort
4518+
4519+
FROM employees
4520+
| SORT emp_no DESC // going to be dropped
4521+
| INLINE STATS min = MIN(salary) BY languages
4522+
| SORT salary ASC
4523+
| INLINE STATS avg = AVG(salary) BY emp_no
4524+
| WHERE emp_no > 10050
4525+
| KEEP emp_no, avg, salary
4526+
| LIMIT 5
4527+
;
4528+
4529+
emp_no:integer |avg:double |salary:integer
4530+
10092 |25976.0 |25976
4531+
10057 |27215.0 |27215
4532+
10084 |28035.0 |28035
4533+
10068 |28941.0 |28941
4534+
10060 |29175.0 |29175
4535+
;
4536+
4537+
inlineStatsAfterSortShadowed
4538+
required_capability: inline_stats_preceeded_by_sort
4539+
4540+
FROM employees
4541+
| EVAL s1 = salary + 1
4542+
| SORT s1
4543+
| WHERE s1 > 50000
4544+
| EVAL ls = languages::string
4545+
| EVAL s1 = salary // s1 is shadowed, not dropped
4546+
| INLINE STATS cd = COUNT_DISTINCT(ls)
4547+
| WHERE emp_no < 10006
4548+
| KEEP emp_no, ls, cd, s1
4549+
;
4550+
4551+
emp_no:integer |ls:keyword |cd:long |s1:integer
4552+
10002 |5 |5 |56371
4553+
10001 |2 |5 |57305
4554+
10003 |4 |5 |61805
4555+
10005 |1 |5 |63528
4556+
;
4557+
4558+
inlineStatsAfterSortDropped
4559+
required_capability: inline_stats_preceeded_by_sort
4560+
4561+
FROM employees
4562+
| EVAL s1 = salary + 1
4563+
| SORT s1
4564+
| WHERE s1 > 72000
4565+
| DROP s1
4566+
| INLINE STATS cd = COUNT_DISTINCT(languages)
4567+
| KEEP emp_no, cd, languages, salary
4568+
;
4569+
4570+
emp_no:integer |cd:long |languages:integer|salary:integer
4571+
10099 |4 |2 |73578
4572+
10019 |4 |1 |73717
4573+
10027 |4 |null |73851
4574+
10007 |4 |4 |74572
4575+
10045 |4 |3 |74970
4576+
10029 |4 |null |74999
4577+
;
4578+
4579+
mixedShadowingInlineStatsAfterSort
4580+
required_capability: inline_stats_preceeded_by_sort
4581+
4582+
FROM employees
4583+
| KEEP salary, emp_no, first_name
4584+
| SORT salary, emp_no
4585+
| INLINE STATS salary = COUNT(*) BY emp_no
4586+
| LIMIT 5
4587+
;
4588+
4589+
first_name:keyword|salary:long |emp_no:integer
4590+
Guoxiang |1 |10015
4591+
null |1 |10035
4592+
Valdiodio |1 |10092
4593+
Florian |1 |10048
4594+
Ebbe |1 |10057
4595+
;
4596+
4597+
inlineStatsWithAggExpressionAfterSortAndRename
4598+
required_capability: inline_stats_preceeded_by_sort
4599+
4600+
FROM employees
4601+
| KEEP emp_no, salary, languages
4602+
| SORT emp_no
4603+
| RENAME emp_no AS emp_idx
4604+
| INLINE STATS sum = COUNT(salary) + COUNT(salary) BY languages
4605+
| LIMIT 5
4606+
;
4607+
4608+
emp_idx:integer|salary:integer |sum:long |languages:integer
4609+
10001 |57305 |38 |2
4610+
10002 |56371 |42 |5
4611+
10003 |61805 |36 |4
4612+
10004 |36174 |42 |5
4613+
10005 |63528 |30 |1
4614+
;
4615+
4616+
inlineStatsAfterEnrichAndSort
4617+
required_capability: inline_stats_preceeded_by_sort
4618+
required_capability: enrich_load
4619+
4620+
FROM employees
4621+
| KEEP emp_no, first_name, salary, languages
4622+
| EVAL language_name = first_name // key in this test is to have a field overwritten by the enrich, and sorting by it before
4623+
| SORT language_name
4624+
| EVAL language_code = languages::KEYWORD
4625+
| ENRICH languages_policy
4626+
| RENAME salary AS sal
4627+
| INLINE STATS cd = COUNT(emp_no) BY languages
4628+
| LIMIT 7
4629+
;
4630+
4631+
emp_no:integer |first_name:keyword|sal:integer |language_code:keyword | language_name:keyword |cd:long |languages:integer
4632+
10059 |Alejandro |44307 |2 |French |19 |2
4633+
10091 |Amabile |38645 |3 |Spanish |17 |3
4634+
10006 |Anneke |60335 |3 |Spanish |17 |3
4635+
10062 |Anoosh |65030 |3 |Spanish |17 |3
4636+
10094 |Arumugam |66817 |5 |null |21 |5
4637+
10049 |Basil |37853 |5 |null |21 |5
4638+
10058 |Berhard |38376 |3 |Spanish |17 |3
4639+
;
4640+
4641+
inlineStatsAfterSortDropped
4642+
required_capability: inline_stats_preceeded_by_sort
4643+
4644+
FROM employees
4645+
| KEEP salary, emp_no, first_name, gender
4646+
| EVAL old_salary = salary
4647+
| SORT old_salary, emp_no
4648+
| DROP old_salary
4649+
| INLINE STATS salary = COUNT(*) BY gender
4650+
| LIMIT 5
4651+
;
4652+
4653+
emp_no:integer |first_name:keyword |salary:long |gender:keyword
4654+
10015 |Guoxiang |10 |null
4655+
10035 |null |57 |M
4656+
10092 |Valdiodio |33 |F
4657+
10048 |Florian |57 |M
4658+
10057 |Ebbe |33 |F
4659+
;
4660+
4661+
inlineStatsAfterSortDroppedAndResort
4662+
required_capability: inline_stats_preceeded_by_sort
4663+
4664+
FROM employees
4665+
| KEEP salary, emp_no, first_name, gender
4666+
| EVAL old_salary = salary
4667+
| SORT old_salary, emp_no
4668+
| DROP old_salary
4669+
| EVAL salary = salary / 1000
4670+
| RENAME salary AS s
4671+
| SORT s, first_name
4672+
| INLINE STATS s = COUNT(*) BY gender
4673+
| LIMIT 5
4674+
;
4675+
4676+
emp_no:integer |first_name:keyword |s:long |gender:keyword
4677+
10015 |Guoxiang |10 |null
4678+
10092 |Valdiodio |33 |F
4679+
10035 |null |57 |M
4680+
10048 |Florian |57 |M
4681+
10057 |Ebbe |33 |F
4682+
;
4683+
4684+
inlineStatsDoubledAfterSortDropped
4685+
required_capability: inline_stats_preceeded_by_sort
4686+
4687+
FROM employees
4688+
| EVAL s1 = salary + 1
4689+
| SORT s1
4690+
| WHERE s1 > 72000
4691+
| DROP s1
4692+
| INLINE STATS sum = SUM(languages)
4693+
| INLINE STATS cd = COUNT_DISTINCT(languages)
4694+
| KEEP emp_no, cd, languages, salary, sum
4695+
;
4696+
4697+
emp_no:integer |cd:long |languages:integer|salary:integer |sum:long
4698+
10099 |4 |2 |73578 |10
4699+
10019 |4 |1 |73717 |10
4700+
10027 |4 |null |73851 |10
4701+
10007 |4 |4 |74572 |10
4702+
10045 |4 |3 |74970 |10
4703+
10029 |4 |null |74999 |10
4704+
;
4705+
4706+
inlineStatsAfterStats
4707+
required_capability: inline_stats_preceeded_by_sort
4708+
4709+
FROM employees
4710+
| KEEP salary, emp_no, first_name, gender
4711+
| SORT salary
4712+
| STATS salary = MAX(salary) BY gender
4713+
| INLINE STATS s = COUNT(*) BY gender
4714+
| LIMIT 5
4715+
| SORT salary
4716+
;
4717+
4718+
salary:integer |s:long |gender:keyword
4719+
73717 |1 |null
4720+
74572 |1 |F
4721+
74999 |1 |M
4722+
;
4723+
4724+
inlineStatsAfterStatsAndSort
4725+
required_capability: inline_stats_preceeded_by_sort
4726+
4727+
FROM employees
4728+
| KEEP salary, emp_no, first_name, gender
4729+
| SORT salary
4730+
| STATS salary = MAX(salary) BY gender
4731+
| SORT salary DESC
4732+
| INLINE STATS s = COUNT(*) BY gender
4733+
| LIMIT 5
4734+
;
4735+
4736+
salary:integer |s:long |gender:keyword
4737+
74999 |1 |M
4738+
74572 |1 |F
4739+
73717 |1 |null
4740+
;
4741+
44194742
fixClassCastBugWithSeveralCounts
44204743
required_capability: inline_stats
44214744
required_capability: fix_stats_classcast_exception

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -983,6 +983,11 @@ public enum Cap {
983983
*/
984984
INLINE_STATS,
985985

986+
/**
987+
* Added support for having INLINE STATS preceded by a SORT clause, now executable in certain cases.
988+
*/
989+
INLINE_STATS_PRECEEDED_BY_SORT,
990+
986991
/**
987992
* Support partial_results
988993
*/

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizer.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import org.elasticsearch.xpack.esql.optimizer.rules.logical.DeduplicateAggs;
2323
import org.elasticsearch.xpack.esql.optimizer.rules.logical.ExtractAggregateCommonFilter;
2424
import org.elasticsearch.xpack.esql.optimizer.rules.logical.FoldNull;
25+
import org.elasticsearch.xpack.esql.optimizer.rules.logical.HoistOrderByBeforeInlineJoin;
2526
import org.elasticsearch.xpack.esql.optimizer.rules.logical.HoistRemoteEnrichLimit;
2627
import org.elasticsearch.xpack.esql.optimizer.rules.logical.HoistRemoteEnrichTopN;
2728
import org.elasticsearch.xpack.esql.optimizer.rules.logical.LiteralsOnTheRight;
@@ -225,6 +226,7 @@ protected static Batch<LogicalPlan> operators() {
225226
new PruneRedundantSortClauses(),
226227
new PruneLeftJoinOnNullMatchingField(),
227228
new PruneInlineJoinOnEmptyRightSide(),
229+
new HoistOrderByBeforeInlineJoin(),
228230
new PruneEmptyAggregates()
229231
);
230232
}

0 commit comments

Comments
 (0)