Skip to content

Commit f4aeea2

Browse files
alex-spiesMax Hniebergall
authored andcommitted
ESQL: push down LIMIT past LOOKUP JOIN (#118495) (#118648)
Fix #117698 by enabling push down of `LIMIT` past `LEFT JOIN`s. There is a subtle point here: our `LOOKUP JOIN` currently _exactly preserves the number of rows from the left hand side_. This is different from SQL, where `LEFT JOIN` will return _at least one row for each row from the left_, but may return multiple rows in case of multiple matches. We, instead, throw multiple matches into multi-values, instead. (C.f. [tests that I'm about to add](https://github.com/elastic/elasticsearch/pull/118471/files#diff-334f3328c5f066a093ed8a5ea4a62cd6bcdb304b660b15763bb4f64d0e87ed7cR365-R369) that demonstrate this.) If we were to change our semantics to match SQL's, we'd have to adjust the pushdown, too.
1 parent 26af460 commit f4aeea2

File tree

9 files changed

+77
-35
lines changed

9 files changed

+77
-35
lines changed

x-pack/plugin/esql/qa/server/mixed-cluster/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/mixed/MixedClusterEsqlSpecIT.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
import java.util.List;
2222

2323
import static org.elasticsearch.xpack.esql.CsvTestUtils.isEnabled;
24-
import static org.elasticsearch.xpack.esql.action.EsqlCapabilities.Cap.JOIN_LOOKUP_V4;
24+
import static org.elasticsearch.xpack.esql.action.EsqlCapabilities.Cap.JOIN_LOOKUP_V5;
2525
import static org.elasticsearch.xpack.esql.qa.rest.EsqlSpecTestCase.Mode.ASYNC;
2626

2727
public class MixedClusterEsqlSpecIT extends EsqlSpecTestCase {
@@ -96,7 +96,7 @@ protected boolean supportsInferenceTestService() {
9696

9797
@Override
9898
protected boolean supportsIndexModeLookup() throws IOException {
99-
return hasCapabilities(List.of(JOIN_LOOKUP_V4.capabilityName()));
99+
return hasCapabilities(List.of(JOIN_LOOKUP_V5.capabilityName()));
100100
}
101101

102102
@Override

x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/MultiClusterSpecIT.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@
4848
import static org.elasticsearch.xpack.esql.EsqlTestUtils.classpathResources;
4949
import static org.elasticsearch.xpack.esql.action.EsqlCapabilities.Cap.INLINESTATS;
5050
import static org.elasticsearch.xpack.esql.action.EsqlCapabilities.Cap.INLINESTATS_V2;
51-
import static org.elasticsearch.xpack.esql.action.EsqlCapabilities.Cap.JOIN_LOOKUP_V4;
51+
import static org.elasticsearch.xpack.esql.action.EsqlCapabilities.Cap.JOIN_LOOKUP_V5;
5252
import static org.elasticsearch.xpack.esql.action.EsqlCapabilities.Cap.JOIN_PLANNING_V1;
5353
import static org.elasticsearch.xpack.esql.action.EsqlCapabilities.Cap.METADATA_FIELDS_REMOTE_TEST;
5454
import static org.elasticsearch.xpack.esql.qa.rest.EsqlSpecTestCase.Mode.SYNC;
@@ -124,7 +124,7 @@ protected void shouldSkipTest(String testName) throws IOException {
124124
assumeFalse("INLINESTATS not yet supported in CCS", testCase.requiredCapabilities.contains(INLINESTATS.capabilityName()));
125125
assumeFalse("INLINESTATS not yet supported in CCS", testCase.requiredCapabilities.contains(INLINESTATS_V2.capabilityName()));
126126
assumeFalse("INLINESTATS not yet supported in CCS", testCase.requiredCapabilities.contains(JOIN_PLANNING_V1.capabilityName()));
127-
assumeFalse("LOOKUP JOIN not yet supported in CCS", testCase.requiredCapabilities.contains(JOIN_LOOKUP_V4.capabilityName()));
127+
assumeFalse("LOOKUP JOIN not yet supported in CCS", testCase.requiredCapabilities.contains(JOIN_LOOKUP_V5.capabilityName()));
128128
}
129129

130130
private TestFeatureService remoteFeaturesService() throws IOException {
@@ -283,8 +283,8 @@ protected boolean supportsInferenceTestService() {
283283

284284
@Override
285285
protected boolean supportsIndexModeLookup() throws IOException {
286-
// CCS does not yet support JOIN_LOOKUP_V4 and clusters falsely report they have this capability
287-
// return hasCapabilities(List.of(JOIN_LOOKUP_V4.capabilityName()));
286+
// CCS does not yet support JOIN_LOOKUP_V5 and clusters falsely report they have this capability
287+
// return hasCapabilities(List.of(JOIN_LOOKUP_V5.capabilityName()));
288288
return false;
289289
}
290290
}

x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec

Lines changed: 38 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
//TODO: this sometimes returns null instead of the looked up value (likely related to the execution order)
77
basicOnTheDataNode
8-
required_capability: join_lookup_v4
8+
required_capability: join_lookup_v5
99

1010
FROM employees
1111
| EVAL language_code = languages
@@ -22,7 +22,7 @@ emp_no:integer | language_code:integer | language_name:keyword
2222
;
2323

2424
basicRow
25-
required_capability: join_lookup_v4
25+
required_capability: join_lookup_v5
2626

2727
ROW language_code = 1
2828
| LOOKUP JOIN languages_lookup ON language_code
@@ -33,7 +33,7 @@ language_code:integer | language_name:keyword
3333
;
3434

3535
basicOnTheCoordinator
36-
required_capability: join_lookup_v4
36+
required_capability: join_lookup_v5
3737

3838
FROM employees
3939
| SORT emp_no
@@ -50,7 +50,7 @@ emp_no:integer | language_code:integer | language_name:keyword
5050
;
5151

5252
subsequentEvalOnTheDataNode
53-
required_capability: join_lookup_v4
53+
required_capability: join_lookup_v5
5454

5555
FROM employees
5656
| EVAL language_code = languages
@@ -68,7 +68,7 @@ emp_no:integer | language_code:integer | language_name:keyword | language_code_x
6868
;
6969

7070
subsequentEvalOnTheCoordinator
71-
required_capability: join_lookup_v4
71+
required_capability: join_lookup_v5
7272

7373
FROM employees
7474
| SORT emp_no
@@ -85,8 +85,25 @@ emp_no:integer | language_code:integer | language_name:keyword | language_code_x
8585
10003 | 4 | german | 8
8686
;
8787

88+
sortEvalBeforeLookup
89+
required_capability: join_lookup_v5
90+
91+
FROM employees
92+
| SORT emp_no
93+
| EVAL language_code = (emp_no % 10) + 1
94+
| LOOKUP JOIN languages_lookup ON language_code
95+
| KEEP emp_no, language_code, language_name
96+
| LIMIT 3
97+
;
98+
99+
emp_no:integer | language_code:integer | language_name:keyword
100+
10001 | 2 | French
101+
10002 | 3 | Spanish
102+
10003 | 4 | German
103+
;
104+
88105
lookupIPFromRow
89-
required_capability: join_lookup_v4
106+
required_capability: join_lookup_v5
90107

91108
ROW left = "left", client_ip = "172.21.0.5", right = "right"
92109
| LOOKUP JOIN clientips_lookup ON client_ip
@@ -97,7 +114,7 @@ left | 172.21.0.5 | right | Development
97114
;
98115

99116
lookupIPFromRowWithShadowing
100-
required_capability: join_lookup_v4
117+
required_capability: join_lookup_v5
101118

102119
ROW left = "left", client_ip = "172.21.0.5", env = "env", right = "right"
103120
| LOOKUP JOIN clientips_lookup ON client_ip
@@ -108,7 +125,7 @@ left | 172.21.0.5 | right | Development
108125
;
109126

110127
lookupIPFromRowWithShadowingKeep
111-
required_capability: join_lookup_v4
128+
required_capability: join_lookup_v5
112129

113130
ROW left = "left", client_ip = "172.21.0.5", env = "env", right = "right"
114131
| EVAL client_ip = client_ip::keyword
@@ -121,7 +138,7 @@ left | 172.21.0.5 | right | Development
121138
;
122139

123140
lookupIPFromRowWithShadowingKeepReordered
124-
required_capability: join_lookup_v4
141+
required_capability: join_lookup_v5
125142

126143
ROW left = "left", client_ip = "172.21.0.5", env = "env", right = "right"
127144
| EVAL client_ip = client_ip::keyword
@@ -134,7 +151,7 @@ right | Development | 172.21.0.5
134151
;
135152

136153
lookupIPFromIndex
137-
required_capability: join_lookup_v4
154+
required_capability: join_lookup_v5
138155

139156
FROM sample_data
140157
| EVAL client_ip = client_ip::keyword
@@ -153,7 +170,7 @@ ignoreOrder:true
153170
;
154171

155172
lookupIPFromIndexKeep
156-
required_capability: join_lookup_v4
173+
required_capability: join_lookup_v5
157174

158175
FROM sample_data
159176
| EVAL client_ip = client_ip::keyword
@@ -173,7 +190,7 @@ ignoreOrder:true
173190
;
174191

175192
lookupIPFromIndexStats
176-
required_capability: join_lookup_v4
193+
required_capability: join_lookup_v5
177194

178195
FROM sample_data
179196
| EVAL client_ip = client_ip::keyword
@@ -189,7 +206,7 @@ count:long | env:keyword
189206
;
190207

191208
lookupIPFromIndexStatsKeep
192-
required_capability: join_lookup_v4
209+
required_capability: join_lookup_v5
193210

194211
FROM sample_data
195212
| EVAL client_ip = client_ip::keyword
@@ -206,7 +223,7 @@ count:long | env:keyword
206223
;
207224

208225
lookupMessageFromRow
209-
required_capability: join_lookup_v4
226+
required_capability: join_lookup_v5
210227

211228
ROW left = "left", message = "Connected to 10.1.0.1", right = "right"
212229
| LOOKUP JOIN message_types_lookup ON message
@@ -217,7 +234,7 @@ left | Connected to 10.1.0.1 | right | Success
217234
;
218235

219236
lookupMessageFromRowWithShadowing
220-
required_capability: join_lookup_v4
237+
required_capability: join_lookup_v5
221238

222239
ROW left = "left", message = "Connected to 10.1.0.1", type = "unknown", right = "right"
223240
| LOOKUP JOIN message_types_lookup ON message
@@ -228,7 +245,7 @@ left | Connected to 10.1.0.1 | right | Success
228245
;
229246

230247
lookupMessageFromRowWithShadowingKeep
231-
required_capability: join_lookup_v4
248+
required_capability: join_lookup_v5
232249

233250
ROW left = "left", message = "Connected to 10.1.0.1", type = "unknown", right = "right"
234251
| LOOKUP JOIN message_types_lookup ON message
@@ -240,7 +257,7 @@ left | Connected to 10.1.0.1 | right | Success
240257
;
241258

242259
lookupMessageFromIndex
243-
required_capability: join_lookup_v4
260+
required_capability: join_lookup_v5
244261

245262
FROM sample_data
246263
| LOOKUP JOIN message_types_lookup ON message
@@ -258,7 +275,7 @@ ignoreOrder:true
258275
;
259276

260277
lookupMessageFromIndexKeep
261-
required_capability: join_lookup_v4
278+
required_capability: join_lookup_v5
262279

263280
FROM sample_data
264281
| LOOKUP JOIN message_types_lookup ON message
@@ -277,7 +294,7 @@ ignoreOrder:true
277294
;
278295

279296
lookupMessageFromIndexKeepReordered
280-
required_capability: join_lookup_v4
297+
required_capability: join_lookup_v5
281298

282299
FROM sample_data
283300
| LOOKUP JOIN message_types_lookup ON message
@@ -296,7 +313,7 @@ Success | 172.21.2.162 | 3450233 | Connected to 10.1.0.3
296313
;
297314

298315
lookupMessageFromIndexStats
299-
required_capability: join_lookup_v4
316+
required_capability: join_lookup_v5
300317

301318
FROM sample_data
302319
| LOOKUP JOIN message_types_lookup ON message
@@ -311,7 +328,7 @@ count:long | type:keyword
311328
;
312329

313330
lookupMessageFromIndexStatsKeep
314-
required_capability: join_lookup_v4
331+
required_capability: join_lookup_v5
315332

316333
FROM sample_data
317334
| LOOKUP JOIN message_types_lookup ON message

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -527,7 +527,7 @@ public enum Cap {
527527
/**
528528
* LOOKUP JOIN
529529
*/
530-
JOIN_LOOKUP_V4(Build.current().isSnapshot()),
530+
JOIN_LOOKUP_V5(Build.current().isSnapshot()),
531531

532532
/**
533533
* Fix for https://github.com/elastic/elasticsearch/issues/117054

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PushDownAndCombineLimits.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
import org.elasticsearch.xpack.esql.plan.logical.UnaryPlan;
2020
import org.elasticsearch.xpack.esql.plan.logical.join.Join;
2121
import org.elasticsearch.xpack.esql.plan.logical.join.JoinTypes;
22-
import org.elasticsearch.xpack.esql.plan.logical.local.LocalRelation;
2322

2423
public final class PushDownAndCombineLimits extends OptimizerRules.OptimizerRule<Limit> {
2524

@@ -63,8 +62,10 @@ public LogicalPlan rule(Limit limit) {
6362
}
6463
}
6564
} else if (limit.child() instanceof Join join) {
66-
if (join.config().type() == JoinTypes.LEFT && join.right() instanceof LocalRelation) {
67-
// This is a hash join from something like a lookup.
65+
if (join.config().type() == JoinTypes.LEFT) {
66+
// NOTE! This is only correct because our LEFT JOINs preserve the number of rows from the left hand side.
67+
// This deviates from SQL semantics. In SQL, multiple matches on the right hand side lead to multiple rows in the output.
68+
// For us, multiple matches on the right hand side are collected into multi-values.
6869
return join.replaceChildren(limit.replaceChild(join.left()), join.right());
6970
}
7071
}

x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -260,7 +260,7 @@ public final void test() throws Throwable {
260260
);
261261
assumeFalse(
262262
"lookup join disabled for csv tests",
263-
testCase.requiredCapabilities.contains(EsqlCapabilities.Cap.JOIN_LOOKUP_V4.capabilityName())
263+
testCase.requiredCapabilities.contains(EsqlCapabilities.Cap.JOIN_LOOKUP_V5.capabilityName())
264264
);
265265
assumeFalse(
266266
"can't use TERM function in csv tests",

x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2146,7 +2146,7 @@ public void testLookupMatchTypeWrong() {
21462146
}
21472147

21482148
public void testLookupJoinUnknownIndex() {
2149-
assumeTrue("requires LOOKUP JOIN capability", EsqlCapabilities.Cap.JOIN_LOOKUP_V4.isEnabled());
2149+
assumeTrue("requires LOOKUP JOIN capability", EsqlCapabilities.Cap.JOIN_LOOKUP_V5.isEnabled());
21502150

21512151
String errorMessage = "Unknown index [foobar]";
21522152
IndexResolution missingLookupIndex = IndexResolution.invalid(errorMessage);
@@ -2175,7 +2175,7 @@ public void testLookupJoinUnknownIndex() {
21752175
}
21762176

21772177
public void testLookupJoinUnknownField() {
2178-
assumeTrue("requires LOOKUP JOIN capability", EsqlCapabilities.Cap.JOIN_LOOKUP_V4.isEnabled());
2178+
assumeTrue("requires LOOKUP JOIN capability", EsqlCapabilities.Cap.JOIN_LOOKUP_V5.isEnabled());
21792179

21802180
String query = "FROM test | LOOKUP JOIN languages_lookup ON last_name";
21812181
String errorMessage = "1:45: Unknown column [last_name] in right side of join";

x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1989,7 +1989,7 @@ public void testSortByAggregate() {
19891989
}
19901990

19911991
public void testLookupJoinDataTypeMismatch() {
1992-
assumeTrue("requires LOOKUP JOIN capability", EsqlCapabilities.Cap.JOIN_LOOKUP_V4.isEnabled());
1992+
assumeTrue("requires LOOKUP JOIN capability", EsqlCapabilities.Cap.JOIN_LOOKUP_V5.isEnabled());
19931993

19941994
query("FROM test | EVAL language_code = languages | LOOKUP JOIN languages_lookup ON language_code");
19951995

x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
import org.elasticsearch.xpack.esql.core.expression.predicate.logical.Or;
4141
import org.elasticsearch.xpack.esql.core.expression.predicate.nulls.IsNotNull;
4242
import org.elasticsearch.xpack.esql.core.expression.predicate.operator.comparison.BinaryComparison;
43+
import org.elasticsearch.xpack.esql.core.tree.Source;
4344
import org.elasticsearch.xpack.esql.core.type.DataType;
4445
import org.elasticsearch.xpack.esql.core.type.EsField;
4546
import org.elasticsearch.xpack.esql.core.util.Holder;
@@ -112,7 +113,9 @@
112113
import org.elasticsearch.xpack.esql.plan.logical.UnaryPlan;
113114
import org.elasticsearch.xpack.esql.plan.logical.join.InlineJoin;
114115
import org.elasticsearch.xpack.esql.plan.logical.join.Join;
116+
import org.elasticsearch.xpack.esql.plan.logical.join.JoinConfig;
115117
import org.elasticsearch.xpack.esql.plan.logical.join.JoinTypes;
118+
import org.elasticsearch.xpack.esql.plan.logical.join.LookupJoin;
116119
import org.elasticsearch.xpack.esql.plan.logical.local.EsqlProject;
117120
import org.elasticsearch.xpack.esql.plan.logical.local.LocalRelation;
118121
import org.elasticsearch.xpack.esql.plan.logical.local.LocalSupplier;
@@ -138,6 +141,7 @@
138141
import static org.elasticsearch.xpack.esql.EsqlTestUtils.TWO;
139142
import static org.elasticsearch.xpack.esql.EsqlTestUtils.as;
140143
import static org.elasticsearch.xpack.esql.EsqlTestUtils.emptySource;
144+
import static org.elasticsearch.xpack.esql.EsqlTestUtils.fieldAttribute;
141145
import static org.elasticsearch.xpack.esql.EsqlTestUtils.getFieldAttribute;
142146
import static org.elasticsearch.xpack.esql.EsqlTestUtils.loadMapping;
143147
import static org.elasticsearch.xpack.esql.EsqlTestUtils.localSource;
@@ -1291,6 +1295,26 @@ public void testCombineLimits() {
12911295
);
12921296
}
12931297

1298+
public void testPushdownLimitsPastLeftJoin() {
1299+
var leftChild = emptySource();
1300+
var rightChild = new LocalRelation(Source.EMPTY, List.of(fieldAttribute()), LocalSupplier.EMPTY);
1301+
assertNotEquals(leftChild, rightChild);
1302+
1303+
var joinConfig = new JoinConfig(JoinTypes.LEFT, List.of(), List.of(), List.of());
1304+
var join = switch (randomIntBetween(0, 2)) {
1305+
case 0 -> new Join(EMPTY, leftChild, rightChild, joinConfig);
1306+
case 1 -> new LookupJoin(EMPTY, leftChild, rightChild, joinConfig);
1307+
case 2 -> new InlineJoin(EMPTY, leftChild, rightChild, joinConfig);
1308+
default -> throw new IllegalArgumentException();
1309+
};
1310+
1311+
var limit = new Limit(EMPTY, L(10), join);
1312+
1313+
var optimizedPlan = new PushDownAndCombineLimits().rule(limit);
1314+
1315+
assertEquals(join.replaceChildren(limit.replaceChild(join.left()), join.right()), optimizedPlan);
1316+
}
1317+
12941318
public void testMultipleCombineLimits() {
12951319
var numberOfLimits = randomIntBetween(3, 10);
12961320
var minimum = randomIntBetween(10, 99);

0 commit comments

Comments
 (0)