Skip to content

Commit 98d5335

Browse files
authored
ESQL: Disable pushdown of WHERE past STATS (#115308)
Fix #115281 Let's disable the faulty optimization for now and re-introduce it later, correctly.
1 parent 43a6b35 commit 98d5335

File tree

6 files changed

+67
-29
lines changed

6 files changed

+67
-29
lines changed

docs/changelog/115308.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 115308
2+
summary: "ESQL: Disable pushdown of WHERE past STATS"
3+
area: ES|QL
4+
type: bug
5+
issues:
6+
- 115281

x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2291,6 +2291,33 @@ m:integer |a:double |x:integer
22912291
74999 |48249.0 |0
22922292
;
22932293

2294+
statsWithFilterOnGroups
2295+
required_capability: fix_filter_pushdown_past_stats
2296+
FROM employees
2297+
| STATS v = VALUES(emp_no) by job_positions | WHERE job_positions == "Accountant" | MV_EXPAND v | SORT v
2298+
;
2299+
2300+
v:integer | job_positions:keyword
2301+
10001 | Accountant
2302+
10012 | Accountant
2303+
10016 | Accountant
2304+
10023 | Accountant
2305+
10025 | Accountant
2306+
10028 | Accountant
2307+
10034 | Accountant
2308+
10037 | Accountant
2309+
10044 | Accountant
2310+
10045 | Accountant
2311+
10050 | Accountant
2312+
10051 | Accountant
2313+
10066 | Accountant
2314+
10081 | Accountant
2315+
10085 | Accountant
2316+
10089 | Accountant
2317+
10092 | Accountant
2318+
10094 | Accountant
2319+
;
2320+
22942321

22952322
statsWithFiltering
22962323
required_capability: per_agg_filtering

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -411,7 +411,12 @@ public enum Cap {
411411
/**
412412
* Support for semantic_text field mapping
413413
*/
414-
SEMANTIC_TEXT_TYPE(EsqlCorePlugin.SEMANTIC_TEXT_FEATURE_FLAG);
414+
SEMANTIC_TEXT_TYPE(EsqlCorePlugin.SEMANTIC_TEXT_FEATURE_FLAG),
415+
/**
416+
* Fix for an optimization that caused wrong results
417+
* https://github.com/elastic/elasticsearch/issues/115281
418+
*/
419+
FIX_FILTER_PUSHDOWN_PAST_STATS;
415420

416421
private final boolean enabled;
417422

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PushDownAndCombineFilters.java

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,6 @@
1515
import org.elasticsearch.xpack.esql.core.expression.Expressions;
1616
import org.elasticsearch.xpack.esql.core.expression.ReferenceAttribute;
1717
import org.elasticsearch.xpack.esql.core.expression.predicate.Predicates;
18-
import org.elasticsearch.xpack.esql.expression.function.aggregate.AggregateFunction;
19-
import org.elasticsearch.xpack.esql.plan.logical.Aggregate;
2018
import org.elasticsearch.xpack.esql.plan.logical.Enrich;
2119
import org.elasticsearch.xpack.esql.plan.logical.Eval;
2220
import org.elasticsearch.xpack.esql.plan.logical.Filter;
@@ -38,18 +36,13 @@ protected LogicalPlan rule(Filter filter) {
3836
LogicalPlan child = filter.child();
3937
Expression condition = filter.condition();
4038

39+
// TODO: Push down past STATS if the filter is only on the groups; but take into account how `STATS ... BY field` handles
40+
// multi-values: It seems to be equivalent to `EVAL field = MV_DEDUPE(field) | MV_EXPAND(field) | STATS ... BY field`, where the
41+
// last `STATS ... BY field` can assume that `field` is single-valued (to be checked more thoroughly).
42+
// https://github.com/elastic/elasticsearch/issues/115311
4143
if (child instanceof Filter f) {
4244
// combine nodes into a single Filter with updated ANDed condition
4345
plan = f.with(Predicates.combineAnd(List.of(f.condition(), condition)));
44-
} else if (child instanceof Aggregate agg) { // TODO: re-evaluate along with multi-value support
45-
// Only push [parts of] a filter past an agg if these/it operates on agg's grouping[s], not output.
46-
plan = maybePushDownPastUnary(
47-
filter,
48-
agg,
49-
e -> e instanceof Attribute && agg.output().contains(e) && agg.groupings().contains(e) == false
50-
|| e instanceof AggregateFunction,
51-
NO_OP
52-
);
5346
} else if (child instanceof Eval eval) {
5447
// Don't push if Filter (still) contains references to Eval's fields.
5548
// Account for simple aliases in the Eval, though - these shouldn't stop us.

x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java

Lines changed: 23 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -738,6 +738,7 @@ public void testMultipleCombineLimits() {
738738
);
739739
}
740740

741+
@AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/115311")
741742
public void testSelectivelyPushDownFilterPastRefAgg() {
742743
// expected plan: "from test | where emp_no > 1 and emp_no < 3 | stats x = count(1) by emp_no | where x > 7"
743744
LogicalPlan plan = optimizedPlan("""
@@ -790,6 +791,7 @@ public void testNoPushDownOrFilterPastAgg() {
790791
assertTrue(stats.child() instanceof EsRelation);
791792
}
792793

794+
@AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/115311")
793795
public void testSelectivePushDownComplexFilterPastAgg() {
794796
// expected plan: from test | emp_no > 0 | stats x = count(1) by emp_no | where emp_no < 3 or x > 9
795797
LogicalPlan plan = optimizedPlan("""
@@ -1393,13 +1395,15 @@ public void testPushDownLimitThroughMultipleSort_AfterMvExpand2() {
13931395
}
13941396

13951397
/**
1398+
* TODO: Push down the filter correctly https://github.com/elastic/elasticsearch/issues/115311
1399+
*
13961400
* Expected
13971401
* Limit[5[INTEGER]]
1398-
* \_Aggregate[[first_name{f}#232],[MAX(salary{f}#233) AS max_s, first_name{f}#232]]
1399-
* \_Filter[ISNOTNULL(first_name{f}#232)]
1400-
* \_MvExpand[first_name{f}#232]
1401-
* \_TopN[[Order[emp_no{f}#231,ASC,LAST]],50[INTEGER]]
1402-
* \_EsRelation[employees][emp_no{f}#231, first_name{f}#232, salary{f}#233]
1402+
* \_Filter[ISNOTNULL(first_name{r}#23)]
1403+
* \_Aggregate[STANDARD,[first_name{r}#23],[MAX(salary{f}#18,true[BOOLEAN]) AS max_s, first_name{r}#23]]
1404+
* \_MvExpand[first_name{f}#14,first_name{r}#23]
1405+
* \_TopN[[Order[emp_no{f}#13,ASC,LAST]],50[INTEGER]]
1406+
* \_EsRelation[test][_meta_field{f}#19, emp_no{f}#13, first_name{f}#14, ..]
14031407
*/
14041408
public void testDontPushDownLimitPastAggregate_AndMvExpand() {
14051409
LogicalPlan plan = optimizedPlan("""
@@ -1413,25 +1417,27 @@ public void testDontPushDownLimitPastAggregate_AndMvExpand() {
14131417
| limit 5""");
14141418

14151419
var limit = as(plan, Limit.class);
1420+
var filter = as(limit.child(), Filter.class);
14161421
assertThat(limit.limit().fold(), equalTo(5));
1417-
var agg = as(limit.child(), Aggregate.class);
1418-
var filter = as(agg.child(), Filter.class);
1419-
var mvExp = as(filter.child(), MvExpand.class);
1422+
var agg = as(filter.child(), Aggregate.class);
1423+
var mvExp = as(agg.child(), MvExpand.class);
14201424
var topN = as(mvExp.child(), TopN.class);
14211425
assertThat(topN.limit().fold(), equalTo(50));
14221426
assertThat(orderNames(topN), contains("emp_no"));
14231427
as(topN.child(), EsRelation.class);
14241428
}
14251429

14261430
/**
1431+
* TODO: Push down the filter correctly https://github.com/elastic/elasticsearch/issues/115311
1432+
*
14271433
* Expected
14281434
* Limit[5[INTEGER]]
1429-
* \_Aggregate[[first_name{f}#262],[MAX(salary{f}#263) AS max_s, first_name{f}#262]]
1430-
* \_Filter[ISNOTNULL(first_name{f}#262)]
1431-
* \_Limit[50[INTEGER]]
1432-
* \_MvExpand[first_name{f}#262]
1433-
* \_Limit[50[INTEGER]]
1434-
* \_EsRelation[employees][emp_no{f}#261, first_name{f}#262, salary{f}#263]
1435+
* \_Filter[ISNOTNULL(first_name{r}#22)]
1436+
* \_Aggregate[STANDARD,[first_name{r}#22],[MAX(salary{f}#17,true[BOOLEAN]) AS max_s, first_name{r}#22]]
1437+
* \_Limit[50[INTEGER]]
1438+
* \_MvExpand[first_name{f}#13,first_name{r}#22]
1439+
* \_Limit[50[INTEGER]]
1440+
* \_EsRelation[test][_meta_field{f}#18, emp_no{f}#12, first_name{f}#13, ..]
14351441
*/
14361442
public void testPushDown_TheRightLimit_PastMvExpand() {
14371443
LogicalPlan plan = optimizedPlan("""
@@ -1445,9 +1451,9 @@ public void testPushDown_TheRightLimit_PastMvExpand() {
14451451

14461452
var limit = as(plan, Limit.class);
14471453
assertThat(limit.limit().fold(), equalTo(5));
1448-
var agg = as(limit.child(), Aggregate.class);
1449-
var filter = as(agg.child(), Filter.class);
1450-
limit = as(filter.child(), Limit.class);
1454+
var filter = as(limit.child(), Filter.class);
1455+
var agg = as(filter.child(), Aggregate.class);
1456+
limit = as(agg.child(), Limit.class);
14511457
assertThat(limit.limit().fold(), equalTo(50));
14521458
var mvExp = as(limit.child(), MvExpand.class);
14531459
limit = as(mvExp.child(), Limit.class);

x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PushDownAndCombineFiltersTests.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,7 @@ public void testPushDownLikeRlikeFilter() {
213213

214214
// from ... | where a > 1 | stats count(1) by b | where count(1) >= 3 and b < 2
215215
// => ... | where a > 1 and b < 2 | stats count(1) by b | where count(1) >= 3
216+
@AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/115311")
216217
public void testSelectivelyPushDownFilterPastFunctionAgg() {
217218
EsRelation relation = relation();
218219
GreaterThan conditionA = greaterThanOf(getFieldAttribute("a"), ONE);

0 commit comments

Comments
 (0)