Skip to content

Commit 9116f27

Browse files
pmpailischrisparrinello
authored andcommitted
[ES|QL] pushing down eval expression when it requires data access (elastic#136610)
1 parent e6ce7d7 commit 9116f27

File tree

9 files changed

+422
-9
lines changed

9 files changed

+422
-9
lines changed

docs/changelog/136610.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 136610
2+
summary: Pushing down eval expression when it requires data access
3+
area: ES|QL
4+
type: bug
5+
issues:
6+
- 133462

x-pack/plugin/esql/qa/testFixtures/src/main/resources/score-function.csv-spec

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,3 +125,78 @@ book_no:keyword | title:text
125125
2714 | Return of the King Being the Third Part of The Lord of the Rings | 1.9245924949645996
126126
7350 | Return of the Shadow | 0.0
127127
;
128+
129+
evalUsingScoreAndLimit
130+
required_capability: score_function
131+
required_capability: match_function
132+
required_capability: pushing_down_eval_with_score
133+
134+
FROM books
135+
| EVAL title_match = SCORE(MATCH(title, "aardvark"))
136+
| LIMIT 2
137+
| KEEP title_match
138+
;
139+
140+
title_match:double
141+
0
142+
0
143+
;
144+
145+
evalUsingScoreAndLimit2
146+
required_capability: score_function
147+
required_capability: match_function
148+
required_capability: pushing_down_eval_with_score
149+
150+
FROM books
151+
| WHERE author == "J R R Tolkien"
152+
| EVAL title_match_ring = SCORE(MATCH(title, "ring")), title_match_hobbit = SCORE(MATCH(title, "hobbit"))
153+
| keep title_match_ring, title_match_hobbit
154+
;
155+
156+
title_match_ring:double | title_match_hobbit:double
157+
0 | 3.45605206489563
158+
;
159+
160+
evalUsingScoreAndTopN
161+
required_capability: score_function
162+
required_capability: match_function
163+
required_capability: pushing_down_eval_with_score
164+
165+
FROM books
166+
| SORT book_no desc
167+
| EVAL title_match_towers = SCORE(MATCH(title, "towers"))
168+
| limit 10
169+
| keep book_no, title_match_towers
170+
;
171+
172+
book_no:keyword | title_match_towers:double
173+
9896 | 0
174+
9801 | 0
175+
9607 | 0
176+
9032 | 0
177+
8956 | 0
178+
8875 | 3.5007452964782715
179+
8873 | 0
180+
8678 | 0
181+
8615 | 0
182+
8605 | 0
183+
;
184+
185+
evalUsingScoreAndFilter
186+
required_capability: score_function
187+
required_capability: match_function
188+
required_capability: pushing_down_eval_with_score
189+
190+
FROM books
191+
| sort book_no desc
192+
| EVAL title_match_world = SCORE(MATCH(title, "world"))
193+
| WHERE title_match_world > 0 and to_integer(book_no) < 8500
194+
| LIMIT 10
195+
| KEEP book_no, title_match_world
196+
| SORT book_no asc
197+
| LIMIT 1
198+
;
199+
200+
book_no:keyword | title_match_world:double
201+
7912 | 2.7460334300994873
202+
;

x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/ScoreFunctionIT.java

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import java.util.Collection;
2222
import java.util.List;
2323

24+
import static org.elasticsearch.common.collect.Iterators.toList;
2425
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
2526
import static org.hamcrest.CoreMatchers.containsString;
2627

@@ -33,6 +34,58 @@ public void setupIndex() {
3334
createAndPopulateIndex();
3435
}
3536

37+
public void testPushingDownEvalWithScore() {
38+
var query = """
39+
FROM test
40+
| EVAL first_score = score(match(content, "dog")), s = [1]
41+
| LIMIT 2
42+
""";
43+
44+
try (var resp = run(query)) {
45+
assertColumnTypes(resp.columns(), List.of("text", "integer", "double", "integer"));
46+
assertColumnNames(resp.columns(), List.of("content", "id", "first_score", "s"));
47+
var results = resp.values();
48+
final int expectedNumRows = 2;
49+
final int expectedNumColumns = 4;
50+
for (int i = 0; i < expectedNumRows; i++) {
51+
var row = toList(results.next());
52+
assertEquals(expectedNumColumns, row.size());
53+
for (var column : row) {
54+
assertNotNull(column);
55+
}
56+
}
57+
if (results.hasNext()) {
58+
fail("Expected only 2 rows but got more");
59+
}
60+
}
61+
}
62+
63+
public void testPushingDownEvalWithNestedScore() {
64+
var query = """
65+
FROM test
66+
| EVAL first_score = TO_INTEGER(0.2 + SCORE(MATCH(content, "dog")))
67+
| LIMIT 3
68+
""";
69+
70+
try (var resp = run(query)) {
71+
assertColumnTypes(resp.columns(), List.of("text", "integer", "integer"));
72+
assertColumnNames(resp.columns(), List.of("content", "id", "first_score"));
73+
var results = resp.values();
74+
final int expectedRows = 3;
75+
final int expectedColumns = 3;
76+
for (int i = 0; i < expectedRows; i++) {
77+
var row = toList(results.next());
78+
assertEquals(expectedColumns, row.size());
79+
for (var column : row) {
80+
assertNotNull(column);
81+
}
82+
}
83+
if (results.hasNext()) {
84+
fail("Expected only 2 rows but got more");
85+
}
86+
}
87+
}
88+
3689
public void testScoreSingleNoMetadata() {
3790
var query = """
3891
FROM test

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1508,7 +1508,7 @@ public enum Cap {
15081508

15091509
/**
15101510
* Support for the literal {@code m} suffix as an alias for {@code minute} in temporal amounts.
1511-
*/
1511+
*/
15121512
TEMPORAL_AMOUNT_M,
15131513

15141514
/**
@@ -1529,7 +1529,13 @@ public enum Cap {
15291529
/**
15301530
* Fix double release in inline stats when LocalRelation is reused
15311531
*/
1532-
INLINE_STATS_DOUBLE_RELEASE_FIX(INLINESTATS_V11.enabled)
1532+
INLINE_STATS_DOUBLE_RELEASE_FIX(INLINESTATS_V11.enabled),
1533+
1534+
/**
1535+
* Support for pushing down EVAL with SCORE
1536+
* https://github.com/elastic/elasticsearch/issues/133462
1537+
*/
1538+
PUSHING_DOWN_EVAL_WITH_SCORE
15331539

15341540
;
15351541

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizer.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,7 @@ protected static Batch<LogicalPlan> operators(boolean local) {
192192
new PruneFilters(),
193193
new PruneColumns(),
194194
new PruneLiteralsInOrderBy(),
195-
new PushDownAndCombineLimits(),
195+
new PushDownAndCombineLimits(local),
196196
new PushLimitToKnn(),
197197
new PushDownAndCombineFilters(),
198198
new PushDownConjunctionsToKnnPrefilters(),

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PushDownAndCombineLimits.java

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@
77

88
package org.elasticsearch.xpack.esql.optimizer.rules.logical;
99

10+
import org.elasticsearch.xpack.esql.core.expression.Alias;
11+
import org.elasticsearch.xpack.esql.core.util.Holder;
12+
import org.elasticsearch.xpack.esql.expression.function.fulltext.Score;
1013
import org.elasticsearch.xpack.esql.optimizer.LogicalOptimizerContext;
1114
import org.elasticsearch.xpack.esql.plan.logical.Aggregate;
1215
import org.elasticsearch.xpack.esql.plan.logical.Enrich;
@@ -27,8 +30,11 @@
2730

2831
public final class PushDownAndCombineLimits extends OptimizerRules.ParameterizedOptimizerRule<Limit, LogicalOptimizerContext> {
2932

30-
public PushDownAndCombineLimits() {
33+
private final boolean local;
34+
35+
public PushDownAndCombineLimits(boolean local) {
3136
super(OptimizerRules.TransformDirection.DOWN);
37+
this.local = local;
3238
}
3339

3440
@Override
@@ -45,7 +51,12 @@ public LogicalPlan rule(Limit limit, LogicalOptimizerContext ctx) {
4551
|| unary instanceof RegexExtract
4652
|| unary instanceof Enrich
4753
|| unary instanceof InferencePlan<?>) {
48-
return unary.replaceChild(limit.replaceChild(unary.child()));
54+
if (false == local && unary instanceof Eval && evalAliasNeedsData((Eval) unary)) {
55+
// do not push down the limit through an eval that needs data (e.g. a score function) during initial planning
56+
return limit;
57+
} else {
58+
return unary.replaceChild(limit.replaceChild(unary.child()));
59+
}
4960
} else if (unary instanceof MvExpand) {
5061
// MV_EXPAND can increase the number of rows, so we cannot just push the limit down
5162
// (we also have to preserve the LIMIT afterwards)
@@ -76,6 +87,21 @@ public LogicalPlan rule(Limit limit, LogicalOptimizerContext ctx) {
7687
return limit;
7788
}
7889

90+
/**
91+
* Determines if the provided {@link Alias} expression depends on document data by traversing its expression tree.
92+
* Returns {@code true} if any child expression requires access to document-specific values, such as the {@link Score} function.
93+
* This is used to prevent pushing down limits past operations that need to evaluate expressions using document data.
94+
*/
95+
private boolean evalAliasNeedsData(Eval eval) {
96+
Holder<Boolean> hasScore = new Holder<>(false);
97+
eval.forEachExpression(expr -> {
98+
if (expr instanceof Score) {
99+
hasScore.set(true);
100+
}
101+
});
102+
return hasScore.get();
103+
}
104+
79105
/**
80106
* Checks the existence of another 'visible' Limit, that exists behind an operation that doesn't produce output more data than
81107
* its input (that is not a relation/source nor aggregation).

x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@
5858
import org.elasticsearch.xpack.esql.expression.function.aggregate.SummationMode;
5959
import org.elasticsearch.xpack.esql.expression.function.fulltext.Match;
6060
import org.elasticsearch.xpack.esql.expression.function.fulltext.MultiMatch;
61+
import org.elasticsearch.xpack.esql.expression.function.fulltext.Score;
6162
import org.elasticsearch.xpack.esql.expression.function.grouping.Bucket;
6263
import org.elasticsearch.xpack.esql.expression.function.grouping.Categorize;
6364
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToDouble;
@@ -209,6 +210,33 @@
209210
public class LogicalPlanOptimizerTests extends AbstractLogicalPlanOptimizerTests {
210211
private static final LiteralsOnTheRight LITERALS_ON_THE_RIGHT = new LiteralsOnTheRight();
211212

213+
public void testEvalWithScoreImplicitLimit() {
214+
var plan = plan("""
215+
FROM test
216+
| EVAL s = SCORE(MATCH(last_name, "high"))
217+
""");
218+
var limit = as(plan, Limit.class);
219+
assertThat(limit.child(), instanceOf(Eval.class));
220+
assertThat(((Literal) limit.limit()).value(), equalTo(1000));
221+
var eval = as(limit.child(), Eval.class);
222+
assertThat(eval.fields().size(), equalTo(1));
223+
assertThat(eval.fields().get(0).child(), instanceOf(Score.class));
224+
}
225+
226+
public void testEvalWithScoreExplicitLimit() {
227+
var plan = plan("""
228+
FROM test
229+
| EVAL s = SCORE(MATCH(last_name, "high"))
230+
| LIMIT 42
231+
""");
232+
var limit = as(plan, Limit.class);
233+
assertThat(limit.child(), instanceOf(Eval.class));
234+
assertThat(((Literal) limit.limit()).value(), equalTo(42));
235+
var eval = as(limit.child(), Eval.class);
236+
assertThat(eval.fields().size(), equalTo(1));
237+
assertThat(eval.fields().get(0).child(), instanceOf(Score.class));
238+
}
239+
212240
public void testEmptyProjections() {
213241
var plan = plan("""
214242
from test
@@ -1032,12 +1060,12 @@ public void testCombineLimits() {
10321060
var anotherLimit = new Limit(EMPTY, L(limitValues[secondLimit]), oneLimit);
10331061
assertEquals(
10341062
new Limit(EMPTY, L(Math.min(limitValues[0], limitValues[1])), emptySource()),
1035-
new PushDownAndCombineLimits().rule(anotherLimit, logicalOptimizerCtx)
1063+
new PushDownAndCombineLimits(false).rule(anotherLimit, logicalOptimizerCtx)
10361064
);
10371065
}
10381066

10391067
public void testPushdownLimitsPastLeftJoin() {
1040-
var rule = new PushDownAndCombineLimits();
1068+
var rule = new PushDownAndCombineLimits(false);
10411069

10421070
var leftChild = emptySource();
10431071
var rightChild = new LocalRelation(Source.EMPTY, List.of(fieldAttribute()), EmptyLocalSupplier.EMPTY);

0 commit comments

Comments
 (0)