Skip to content

Commit 45ebb58

Browse files
authored
ESQL: Push filters past inline stats (elastic#137572)
1 parent 7e73dc4 commit 45ebb58

File tree

5 files changed

+1744
-31
lines changed

5 files changed

+1744
-31
lines changed

docs/changelog/137572.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 137572
2+
summary: Push filters past inline stats
3+
area: ES|QL
4+
type: enhancement
5+
issues: []

x-pack/plugin/esql/qa/testFixtures/src/main/resources/inlinestats.csv-spec

Lines changed: 285 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4409,3 +4409,288 @@ count1:long |count2:long
44094409
14 |14
44104410
14 |14
44114411
;
4412+
4413+
pushDown_OneGroupingFilter_PastInlineJoin
4414+
required_capability: inline_stats
4415+
4416+
FROM employees
4417+
| INLINE STATS avg = ROUND(AVG(salary)) BY languages
4418+
| WHERE languages > 2
4419+
| KEEP avg, languages, salary, emp_no
4420+
| SORT emp_no
4421+
| LIMIT 10
4422+
;
4423+
4424+
avg:d | languages:i | salary:i | emp_no:i
4425+
41681.0 |5 |56371 |10002
4426+
47733.0 |4 |61805 |10003
4427+
41681.0 |5 |36174 |10004
4428+
52419.0 |3 |60335 |10006
4429+
47733.0 |4 |74572 |10007
4430+
47733.0 |4 |45797 |10010
4431+
41681.0 |5 |31120 |10011
4432+
41681.0 |5 |48942 |10012
4433+
41681.0 |5 |37137 |10014
4434+
41681.0 |5 |25324 |10015
4435+
;
4436+
4437+
pushDown_SelectiveGroupingAndFilters_PastInlineJoin
4438+
required_capability: inline_stats
4439+
4440+
FROM employees
4441+
| INLINE STATS avg = ROUND(AVG(salary), 1) BY languages, gender
4442+
| WHERE languages > 2 AND gender IS NOT NULL AND emp_no > 10050
4443+
| KEEP avg, languages, gender, emp_no
4444+
| SORT emp_no
4445+
| LIMIT 10
4446+
;
4447+
4448+
avg:d | languages:i | gender:s | emp_no:i
4449+
51741.9 |3 |M |10051
4450+
53660.0 |3 |F |10053
4451+
47058.9 |4 |M |10054
4452+
39052.9 |5 |M |10055
4453+
49291.5 |4 |F |10057
4454+
51741.9 |3 |M |10058
4455+
51741.9 |3 |M |10062
4456+
53660.0 |3 |F |10063
4457+
39052.9 |5 |M |10064
4458+
39052.9 |5 |M |10066
4459+
;
4460+
4461+
pushDown_SelectiveGroupingOrFilters_PastInlineJoin
4462+
required_capability: inline_stats
4463+
4464+
FROM employees
4465+
| INLINE STATS avg = AVG(salary) BY languages, gender
4466+
| WHERE languages > 2 AND (gender IS NOT NULL OR emp_no > 10050)
4467+
| KEEP avg, languages, gender, emp_no
4468+
| SORT emp_no
4469+
| LIMIT 10
4470+
;
4471+
4472+
avg:d | languages:i | gender:s | emp_no:i
4473+
46705.555555555555|5 |F |10002
4474+
47058.90909090909 |4 |M |10003
4475+
39052.875 |5 |M |10004
4476+
53660.0 |3 |F |10006
4477+
49291.5 |4 |F |10007
4478+
51741.90909090909 |3 |M |10030
4479+
47058.90909090909 |4 |M |10031
4480+
53660.0 |3 |F |10032
4481+
39052.875 |5 |M |10035
4482+
47058.90909090909 |4 |M |10036
4483+
;
4484+
4485+
pushDown_ComplexFiltering_CombinedWithLeftFiltering_PastInlineJoin
4486+
required_capability: inline_stats
4487+
4488+
FROM employees
4489+
| WHERE emp_no < 10075 AND salary < 60000
4490+
| INLINE STATS avg = AVG(salary) BY languages, gender
4491+
| WHERE languages > 2 AND (gender IS NOT NULL OR emp_no > 10050) AND salary > 50000
4492+
| KEEP avg, languages, gender, emp_no, salary
4493+
| SORT emp_no
4494+
;
4495+
4496+
avg:d | languages:i | gender:s | emp_no:i | salary:i
4497+
45933.4 |5 |F |10002 |56371
4498+
39657.2 |4 |M |10046 |50064
4499+
41240.6 |3 |M |10051 |58121
4500+
45662.333333333336|3 |F |10053 |54462
4501+
45662.333333333336|3 |F |10063 |52121
4502+
41240.6 |3 |M |10070 |54329
4503+
45933.4 |5 |F |10072 |54518
4504+
;
4505+
4506+
pushDown_ImpossibleFilter_PastInlineJoin
4507+
required_capability: inline_stats
4508+
4509+
FROM employees
4510+
| WHERE salary < 10000
4511+
| INLINE STATS salary = AVG(salary) BY salary
4512+
| WHERE salary > 10000
4513+
| KEEP salary, emp_no
4514+
;
4515+
warning:Line 3:16: Field 'salary' shadowed by field at line 3:40
4516+
4517+
salary:i | emp_no:i
4518+
;
4519+
4520+
dontPushDown_A_SimpleFilter_PastInlineJoin
4521+
required_capability: inline_stats
4522+
4523+
FROM employees
4524+
| INLINE STATS a = ROUND(AVG(salary), 2) BY gender
4525+
| WHERE languages > 2
4526+
| KEEP languages, a, gender, emp_no
4527+
| SORT emp_no
4528+
| LIMIT 10
4529+
;
4530+
4531+
languages:i | a:d | gender:s | emp_no:i
4532+
5 |50490.79 |F |10002
4533+
4 |46860.6 |M |10003
4534+
5 |46860.6 |M |10004
4535+
3 |50490.79 |F |10006
4536+
4 |50490.79 |F |10007
4537+
4 |48760.5 |null |10010
4538+
5 |48760.5 |null |10011
4539+
5 |48760.5 |null |10012
4540+
5 |48760.5 |null |10014
4541+
5 |48760.5 |null |10015
4542+
;
4543+
4544+
partiallyPushDown_GroupingFilters_PastTwoInlineJoins1
4545+
required_capability: inline_stats
4546+
4547+
FROM employees
4548+
| INLINE STATS avgByL = ROUND(AVG(salary)) BY languages
4549+
| INLINE STATS avgByG = ROUND(AVG(salary)) BY gender
4550+
| WHERE languages > 2 AND gender IS NOT NULL
4551+
| KEEP avgB*, languages, gender, emp_no
4552+
| SORT emp_no
4553+
| LIMIT 10
4554+
;
4555+
4556+
avgByL:d | avgByG:d | languages:i | gender:s | emp_no:i
4557+
41681.0 |50491.0 |5 |F |10002
4558+
47733.0 |46861.0 |4 |M |10003
4559+
41681.0 |46861.0 |5 |M |10004
4560+
52419.0 |50491.0 |3 |F |10006
4561+
47733.0 |50491.0 |4 |F |10007
4562+
52419.0 |46861.0 |3 |M |10030
4563+
47733.0 |46861.0 |4 |M |10031
4564+
52419.0 |50491.0 |3 |F |10032
4565+
41681.0 |46861.0 |5 |M |10035
4566+
47733.0 |46861.0 |4 |M |10036
4567+
;
4568+
4569+
partiallyPushDown_GroupingFilters_PastTwoInlineJoins2
4570+
required_capability: inline_stats
4571+
4572+
FROM employees
4573+
| INLINE STATS avgByL = ROUND(AVG(salary)) BY languages
4574+
| INLINE STATS avgByG = ROUND(AVG(salary)) BY gender, languages
4575+
| WHERE languages > 3 AND gender IS NOT NULL AND emp_no > 10050
4576+
| KEEP avgB*, languages, gender, emp_no
4577+
| SORT emp_no
4578+
;
4579+
4580+
avgByL:d | avgByG:d | languages:i | gender:s | emp_no:i
4581+
47733.0 |47059.0 |4 |M |10054
4582+
41681.0 |39053.0 |5 |M |10055
4583+
47733.0 |49292.0 |4 |F |10057
4584+
41681.0 |39053.0 |5 |M |10064
4585+
41681.0 |39053.0 |5 |M |10066
4586+
41681.0 |46706.0 |5 |F |10069
4587+
41681.0 |46706.0 |5 |F |10072
4588+
47733.0 |47059.0 |4 |M |10073
4589+
41681.0 |46706.0 |5 |F |10074
4590+
41681.0 |46706.0 |5 |F |10075
4591+
41681.0 |39053.0 |5 |M |10077
4592+
41681.0 |39053.0 |5 |M |10080
4593+
47733.0 |47059.0 |4 |M |10082
4594+
41681.0 |39053.0 |5 |M |10085
4595+
41681.0 |46706.0 |5 |F |10087
4596+
41681.0 |46706.0 |5 |F |10088
4597+
47733.0 |49292.0 |4 |F |10089
4598+
41681.0 |46706.0 |5 |F |10094
4599+
47733.0 |47059.0 |4 |M |10095
4600+
47733.0 |47059.0 |4 |M |10096
4601+
47733.0 |49292.0 |4 |F |10098
4602+
47733.0 |49292.0 |4 |F |10100
4603+
;
4604+
4605+
partiallyPushDown_GroupingFilters_PastTwoInlineJoins_ExcludeAggFilters
4606+
required_capability: inline_stats
4607+
4608+
FROM employees
4609+
| INLINE STATS avgByL = ROUND(AVG(salary)) BY languages
4610+
| INLINE STATS avgByG = ROUND(AVG(salary)) BY gender, languages
4611+
| WHERE languages > 3 AND gender IS NOT NULL AND avgByL > 40000
4612+
| KEEP avgB*, languages, gender, emp_no
4613+
| SORT emp_no
4614+
| LIMIT 10
4615+
;
4616+
4617+
avgByL:d | avgByG:d | languages:i | gender:s | emp_no:i
4618+
41681.0 |46706.0 |5 |F |10002
4619+
47733.0 |47059.0 |4 |M |10003
4620+
41681.0 |39053.0 |5 |M |10004
4621+
47733.0 |49292.0 |4 |F |10007
4622+
47733.0 |47059.0 |4 |M |10031
4623+
41681.0 |39053.0 |5 |M |10035
4624+
47733.0 |47059.0 |4 |M |10036
4625+
47733.0 |47059.0 |4 |M |10038
4626+
47733.0 |49292.0 |4 |F |10040
4627+
47733.0 |47059.0 |4 |M |10046
4628+
;
4629+
4630+
partiallyPushDown_RenamedGroupingFilters_PastTwoInlineJoins
4631+
required_capability: inline_stats
4632+
4633+
FROM employees
4634+
| INLINE STATS avgByL = ROUND(AVG(salary)) BY languages
4635+
| RENAME languages AS lang
4636+
| INLINE STATS avgByG = ROUND(AVG(salary)) BY gender, lang
4637+
| WHERE lang > 3 AND gender IS NOT NULL AND emp_no > 10050
4638+
| KEEP avgB*, lang, gender, emp_no
4639+
| SORT emp_no
4640+
| LIMIT 10
4641+
;
4642+
4643+
avgByL:d | avgByG:d | lang:i | gender:s | emp_no:i
4644+
47733.0 |47059.0 |4 |M |10054
4645+
41681.0 |39053.0 |5 |M |10055
4646+
47733.0 |49292.0 |4 |F |10057
4647+
41681.0 |39053.0 |5 |M |10064
4648+
41681.0 |39053.0 |5 |M |10066
4649+
41681.0 |46706.0 |5 |F |10069
4650+
41681.0 |46706.0 |5 |F |10072
4651+
47733.0 |47059.0 |4 |M |10073
4652+
41681.0 |46706.0 |5 |F |10074
4653+
41681.0 |46706.0 |5 |F |10075
4654+
;
4655+
4656+
pushDown_OneGroupingFilter_PastInlineJoinWithInnerFilter
4657+
required_capability: inline_stats
4658+
4659+
FROM employees
4660+
| INLINE STATS avg = AVG(salary) WHERE salary > 50000 BY languages
4661+
| WHERE languages > 2
4662+
| KEEP avg, languages, salary
4663+
| SORT salary
4664+
| LIMIT 10
4665+
;
4666+
4667+
avg:d | languages:i | salary:i
4668+
56499.0 |5 |25324
4669+
56499.0 |5 |25945
4670+
62060.27272727273 |3 |26436
4671+
63503.333333333336|4 |27215
4672+
62060.27272727273 |3 |28941
4673+
62060.27272727273 |3 |30404
4674+
56499.0 |5 |31120
4675+
56499.0 |5 |31897
4676+
56499.0 |5 |32272
4677+
63503.333333333336|4 |32568
4678+
;
4679+
4680+
pushDown_OneGroupingFilter_PastInlineJoinWithInnerFilterAndOuterFilter
4681+
required_capability: inline_stats
4682+
4683+
FROM employees
4684+
| INLINE STATS avg = AVG(salary) WHERE salary > 50000 BY languages
4685+
| WHERE languages > 2 AND salary < 30000
4686+
| KEEP avg, languages, salary
4687+
| SORT salary
4688+
;
4689+
4690+
avg:d | languages:i | salary:i
4691+
56499.0 |5 |25324
4692+
56499.0 |5 |25945
4693+
62060.27272727273 |3 |26436
4694+
63503.333333333336|4 |27215
4695+
62060.27272727273 |3 |28941
4696+
;

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PushDownAndCombineFilters.java

Lines changed: 42 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -104,10 +104,7 @@ protected LogicalPlan rule(Filter filter, LogicalOptimizerContext ctx) {
104104
} else if (child instanceof OrderBy orderBy) {
105105
// swap the filter with its child
106106
plan = orderBy.replaceChild(filter.with(orderBy.child(), condition));
107-
} else if (child instanceof Join join && child instanceof InlineJoin == false) {
108-
// TODO: could we do better here about pushing down filters for inline stats?
109-
// See also https://github.com/elastic/elasticsearch/issues/127497
110-
// Push down past INLINE STATS if the condition is on the groupings
107+
} else if (child instanceof Join join) {
111108
return pushDownPastJoin(filter, join, ctx.foldCtx());
112109
}
113110
// cannot push past a Limit, this could change the tailing result set returned
@@ -135,19 +132,51 @@ private static ScopedFilter scopeFilter(List<Expression> filters, LogicalPlan le
135132
return new ScopedFilter(rest, leftFilters, rightFilters);
136133
}
137134

135+
// split the filter condition in 3 parts:
136+
// 1. filters that reference attributes from the right side only
137+
// 2. filters that reference attributes from both sides
138+
// 3. filters that reference attributes from the left side only
139+
private static ScopedFilter scopeInlineStatsFilter(List<Expression> filters, InlineJoin ij) {
140+
List<Expression> rightFilters = new ArrayList<>();
141+
List<Expression> bothSides = new ArrayList<>();
142+
List<Expression> leftFilters = new ArrayList<>(filters);
143+
144+
AttributeSet leftOutputSet = ij.left().outputSet();
145+
AttributeSet rightOutputSet = ij.right().outputSet();
146+
AttributeSet rightOutputSetWithoutKeys = rightOutputSet.subtract(AttributeSet.of(ij.config().rightFields()));
147+
148+
leftFilters.removeIf(f -> {
149+
if (f.references().subsetOf(rightOutputSet)) {
150+
if (f.references().subsetOf(leftOutputSet)) {
151+
bothSides.add(f);
152+
return true;
153+
} else if (f.references().subsetOf(rightOutputSetWithoutKeys)) {
154+
rightFilters.add(f);
155+
return true;
156+
}
157+
}
158+
return false;
159+
});
160+
return new ScopedFilter(leftFilters, bothSides, rightFilters);
161+
}
162+
138163
private static LogicalPlan pushDownPastJoin(Filter filter, Join join, FoldContext foldCtx) {
139164
LogicalPlan plan = filter;
140165
// pushdown only through LEFT joins
141166
// TODO: generalize this for other join types
142167
if (join.config().type() == JoinTypes.LEFT) {
143168
LogicalPlan left = join.left();
144169
LogicalPlan right = join.right();
170+
var conjunctions = Predicates.splitAnd(filter.condition());
171+
172+
// Split the filter condition in 3 parts.
173+
// For InlineJoin we use a scoping that allows pushing down filters either to right side only or to both sides.
174+
// For the rest of the joins we use the standard scoping:
175+
// - filters scoped to the left
176+
// - filters scoped to the right
177+
// - filter that requires both sides to be evaluated
178+
var scoped = join instanceof InlineJoin ij ? scopeInlineStatsFilter(conjunctions, ij) : scopeFilter(conjunctions, left, right);
145179

146-
// split the filter condition in 3 parts:
147-
// 1. filter scoped to the left
148-
// 2. filter scoped to the right
149-
// 3. filter that requires both sides to be evaluated
150-
ScopedFilter scoped = scopeFilter(Predicates.splitAnd(filter.condition()), left, right);
151180
boolean optimizationApplied = false;
152181
// push the left scoped filter down to the left child
153182
if (scoped.leftFilters.size() > 0) {
@@ -156,15 +185,15 @@ private static LogicalPlan pushDownPastJoin(Filter filter, Join join, FoldContex
156185
// update the join with the new left child
157186
join = (Join) join.replaceLeft(left);
158187
// we completely applied the left filters, so we can remove them from the scoped filters
159-
scoped = new ScopedFilter(scoped.commonFilters(), List.of(), scoped.rightFilters);
188+
scoped = new ScopedFilter(scoped.commonFilters, List.of(), scoped.rightFilters);
160189
optimizationApplied = true;
161190
}
162191
// push the right scoped filter down to the right child
163192
// We check if each AND component of the filter is already part of the right side filter before we add it
164193
// In the future, this optimization can apply to other types of joins as well such as InlineJoin
165194
// but for now we limit it to LEFT joins only, till filters are supported for other join types
166-
if (scoped.rightFilters().isEmpty() == false) {
167-
List<Expression> rightPushableFilters = buildRightPushableFilters(scoped.rightFilters(), foldCtx);
195+
if (scoped.rightFilters.isEmpty() == false && join instanceof InlineJoin == false) {
196+
List<Expression> rightPushableFilters = buildRightPushableFilters(scoped.rightFilters, foldCtx);
168197
if (rightPushableFilters.isEmpty() == false) {
169198
if (join.right() instanceof Filter existingRightFilter) {
170199
// merge the unique AND filter components from rightPushableFilters and existingRightFilter.condition()
@@ -210,7 +239,7 @@ private static LogicalPlan pushDownPastJoin(Filter filter, Join join, FoldContex
210239
}
211240
if (optimizationApplied) {
212241
// if we pushed down some filters, we need to update the filters to reapply above the join
213-
Expression remainingFilter = Predicates.combineAnd(CollectionUtils.combine(scoped.commonFilters, scoped.rightFilters));
242+
Expression remainingFilter = Predicates.combineAnd(CollectionUtils.combine(scoped.commonFilters(), scoped.rightFilters));
214243
plan = remainingFilter != null ? filter.with(join, remainingFilter) : join;
215244
}
216245
}

0 commit comments

Comments
 (0)