Skip to content

Commit 5afbfda

Browse files
[8.x] Backport two PRs (#117246) (#117843) (#117967)
* LOOKUP JOIN using field-caps for field mapping (#117246) * LOOKUP JOIN using field-caps for field mapping Removes the hard-coded hack for languages_lookup, and instead does a field-caps check for the real join index. * Update docs/changelog/117246.yaml * Some code review comments * Enhance LOOKUP JOIN csv-spec tests to cover more cases and fix several bugs found (#117843) Adds several more tests to lookup-join.csv-spec, and fixes the following bugs: * FieldCaps on right hand side should ignore fieldNames method and just use "*" because currently the fieldNames search cannot handle lookup fields with aliases (should be fixed in a followup PR). * Stop using the lookup index in the ComputeService (so we don’t get both indices data coming in from the left, and other weird behaviour). * Ignore failing SearchStats checks on fields from the right hand side in the logical planner (so it does not plan EVAL field = null for all right hand fields). This should be fixed properly with the correct updates to TransportSearchShardsAction (or rather to making multiple use of that for each branch of the execution model). * Don't load indices with mode:lookup due to cluster state errors in mixed clusters * Disable all lookup-join tests on 8.x, due to issues with cluster state * Spotless apply
1 parent b8afe64 commit 5afbfda

File tree

24 files changed

+494
-122
lines changed

24 files changed

+494
-122
lines changed

docs/changelog/117246.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 117246
2+
summary: LOOKUP JOIN using field-caps for field mapping
3+
area: ES|QL
4+
type: enhancement
5+
issues: []

x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/MultiClusterSpecIT.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@
4848
import static org.elasticsearch.xpack.esql.EsqlTestUtils.classpathResources;
4949
import static org.elasticsearch.xpack.esql.action.EsqlCapabilities.Cap.INLINESTATS;
5050
import static org.elasticsearch.xpack.esql.action.EsqlCapabilities.Cap.INLINESTATS_V2;
51-
import static org.elasticsearch.xpack.esql.action.EsqlCapabilities.Cap.JOIN_LOOKUP_V3;
51+
import static org.elasticsearch.xpack.esql.action.EsqlCapabilities.Cap.JOIN_LOOKUP_V4;
5252
import static org.elasticsearch.xpack.esql.action.EsqlCapabilities.Cap.JOIN_PLANNING_V1;
5353
import static org.elasticsearch.xpack.esql.action.EsqlCapabilities.Cap.METADATA_FIELDS_REMOTE_TEST;
5454
import static org.elasticsearch.xpack.esql.qa.rest.EsqlSpecTestCase.Mode.SYNC;
@@ -124,7 +124,7 @@ protected void shouldSkipTest(String testName) throws IOException {
124124
assumeFalse("INLINESTATS not yet supported in CCS", testCase.requiredCapabilities.contains(INLINESTATS.capabilityName()));
125125
assumeFalse("INLINESTATS not yet supported in CCS", testCase.requiredCapabilities.contains(INLINESTATS_V2.capabilityName()));
126126
assumeFalse("INLINESTATS not yet supported in CCS", testCase.requiredCapabilities.contains(JOIN_PLANNING_V1.capabilityName()));
127-
assumeFalse("LOOKUP JOIN not yet supported in CCS", testCase.requiredCapabilities.contains(JOIN_LOOKUP_V3.capabilityName()));
127+
assumeFalse("LOOKUP JOIN not yet supported in CCS", testCase.requiredCapabilities.contains(JOIN_LOOKUP_V4.capabilityName()));
128128
}
129129

130130
private TestFeatureService remoteFeaturesService() throws IOException {

x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,8 @@ public class CsvTestsDataLoader {
5656
private static final TestsDataset APPS = new TestsDataset("apps");
5757
private static final TestsDataset APPS_SHORT = APPS.withIndex("apps_short").withTypeMapping(Map.of("id", "short"));
5858
private static final TestsDataset LANGUAGES = new TestsDataset("languages");
59+
// private static final TestsDataset LANGUAGES_LOOKUP = LANGUAGES.withIndex("languages_lookup")
60+
// .withSetting("languages_lookup-settings.json");
5961
private static final TestsDataset ALERTS = new TestsDataset("alerts");
6062
private static final TestsDataset UL_LOGS = new TestsDataset("ul_logs");
6163
private static final TestsDataset SAMPLE_DATA = new TestsDataset("sample_data");
@@ -70,6 +72,11 @@ public class CsvTestsDataLoader {
7072
.withTypeMapping(Map.of("@timestamp", "date_nanos"));
7173
private static final TestsDataset MISSING_IP_SAMPLE_DATA = new TestsDataset("missing_ip_sample_data");
7274
private static final TestsDataset CLIENT_IPS = new TestsDataset("clientips");
75+
// private static final TestsDataset CLIENT_IPS_LOOKUP = CLIENT_IPS.withIndex("clientips_lookup")
76+
// .withSetting("clientips_lookup-settings.json");
77+
private static final TestsDataset MESSAGE_TYPES = new TestsDataset("message_types");
78+
// private static final TestsDataset MESSAGE_TYPES_LOOKUP = MESSAGE_TYPES.withIndex("message_types_lookup")
79+
// .withSetting("message_types_lookup-settings.json");
7380
private static final TestsDataset CLIENT_CIDR = new TestsDataset("client_cidr");
7481
private static final TestsDataset AGES = new TestsDataset("ages");
7582
private static final TestsDataset HEIGHTS = new TestsDataset("heights");
@@ -94,14 +101,13 @@ public class CsvTestsDataLoader {
94101
private static final TestsDataset BOOKS = new TestsDataset("books");
95102
private static final TestsDataset SEMANTIC_TEXT = new TestsDataset("semantic_text").withInferenceEndpoint(true);
96103

97-
private static final String LOOKUP_INDEX_SUFFIX = "_lookup";
98-
99104
public static final Map<String, TestsDataset> CSV_DATASET_MAP = Map.ofEntries(
100105
Map.entry(EMPLOYEES.indexName, EMPLOYEES),
101106
Map.entry(HOSTS.indexName, HOSTS),
102107
Map.entry(APPS.indexName, APPS),
103108
Map.entry(APPS_SHORT.indexName, APPS_SHORT),
104109
Map.entry(LANGUAGES.indexName, LANGUAGES),
110+
// Map.entry(LANGUAGES_LOOKUP.indexName, LANGUAGES_LOOKUP),
105111
Map.entry(UL_LOGS.indexName, UL_LOGS),
106112
Map.entry(SAMPLE_DATA.indexName, SAMPLE_DATA),
107113
Map.entry(MV_SAMPLE_DATA.indexName, MV_SAMPLE_DATA),
@@ -111,6 +117,9 @@ public class CsvTestsDataLoader {
111117
Map.entry(SAMPLE_DATA_TS_NANOS.indexName, SAMPLE_DATA_TS_NANOS),
112118
Map.entry(MISSING_IP_SAMPLE_DATA.indexName, MISSING_IP_SAMPLE_DATA),
113119
Map.entry(CLIENT_IPS.indexName, CLIENT_IPS),
120+
// Map.entry(CLIENT_IPS_LOOKUP.indexName, CLIENT_IPS_LOOKUP),
121+
Map.entry(MESSAGE_TYPES.indexName, MESSAGE_TYPES),
122+
// Map.entry(MESSAGE_TYPES_LOOKUP.indexName, MESSAGE_TYPES_LOOKUP),
114123
Map.entry(CLIENT_CIDR.indexName, CLIENT_CIDR),
115124
Map.entry(AGES.indexName, AGES),
116125
Map.entry(HEIGHTS.indexName, HEIGHTS),
@@ -132,9 +141,7 @@ public class CsvTestsDataLoader {
132141
Map.entry(DISTANCES.indexName, DISTANCES),
133142
Map.entry(ADDRESSES.indexName, ADDRESSES),
134143
Map.entry(BOOKS.indexName, BOOKS),
135-
Map.entry(SEMANTIC_TEXT.indexName, SEMANTIC_TEXT),
136-
// JOIN LOOKUP alias
137-
Map.entry(LANGUAGES.indexName + LOOKUP_INDEX_SUFFIX, LANGUAGES.withIndex(LANGUAGES.indexName + LOOKUP_INDEX_SUFFIX))
144+
Map.entry(SEMANTIC_TEXT.indexName, SEMANTIC_TEXT)
138145
);
139146

140147
private static final EnrichConfig LANGUAGES_ENRICH = new EnrichConfig("languages_policy", "enrich-policy-languages.json");
@@ -174,13 +181,14 @@ public class CsvTestsDataLoader {
174181
* </p>
175182
* <p>
176183
* Accepts an URL as first argument, eg. http://localhost:9200 or http://user:pass@localhost:9200
177-
*</p>
184+
* </p>
178185
* <p>
179186
* If no arguments are specified, the default URL is http://localhost:9200 without authentication
180187
* </p>
181188
* <p>
182189
* It also supports HTTPS
183190
* </p>
191+
*
184192
* @param args the URL to connect
185193
* @throws IOException
186194
*/
@@ -270,7 +278,9 @@ private static void loadDataSetIntoEs(RestClient client, IndexCreator indexCreat
270278
}
271279
}
272280

273-
/** The semantic_text mapping type require an inference endpoint that needs to be setup before creating the index. */
281+
/**
282+
* The semantic_text mapping type require an inference endpoint that needs to be setup before creating the index.
283+
*/
274284
public static void createInferenceEndpoint(RestClient client) throws IOException {
275285
Request request = new Request("PUT", "_inference/sparse_embedding/test_sparse_inference");
276286
request.setJsonEntity("""
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
{
2+
"index": {
3+
"mode": "lookup"
4+
}
5+
}

x-pack/plugin/esql/qa/testFixtures/src/main/resources/languages.csv

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
language_code:keyword,language_name:keyword
1+
language_code:integer,language_name:keyword
22
1,English
33
2,French
44
3,Spanish
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
{
2+
"index": {
3+
"mode": "lookup"
4+
}
5+
}

x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec

Lines changed: 214 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@
44
//
55

66
//TODO: this sometimes returns null instead of the looked up value (likely related to the execution order)
7-
basicOnTheDataNode-Ignore
8-
required_capability: join_lookup_v3
7+
basicOnTheDataNode
8+
required_capability: join_lookup_v4
99

1010
FROM employees
1111
| EVAL language_code = languages
@@ -21,19 +21,19 @@ emp_no:integer | language_code:integer | language_name:keyword
2121
10093 | 3 | Spanish
2222
;
2323

24-
basicRow-Ignore
25-
required_capability: join_lookup_v3
24+
basicRow
25+
required_capability: join_lookup_v4
2626

2727
ROW language_code = 1
2828
| LOOKUP JOIN languages_lookup ON language_code
2929
;
3030

31-
language_code:keyword | language_name:keyword
31+
language_code:integer | language_name:keyword
3232
1 | English
3333
;
3434

3535
basicOnTheCoordinator
36-
required_capability: join_lookup_v3
36+
required_capability: join_lookup_v4
3737

3838
FROM employees
3939
| SORT emp_no
@@ -49,9 +49,8 @@ emp_no:integer | language_code:integer | language_name:keyword
4949
10003 | 4 | German
5050
;
5151

52-
//TODO: this sometimes returns null instead of the looked up value (likely related to the execution order)
53-
subsequentEvalOnTheDataNode-Ignore
54-
required_capability: join_lookup_v3
52+
subsequentEvalOnTheDataNode
53+
required_capability: join_lookup_v4
5554

5655
FROM employees
5756
| EVAL language_code = languages
@@ -69,7 +68,7 @@ emp_no:integer | language_code:integer | language_name:keyword | language_code_x
6968
;
7069

7170
subsequentEvalOnTheCoordinator
72-
required_capability: join_lookup_v3
71+
required_capability: join_lookup_v4
7372

7473
FROM employees
7574
| SORT emp_no
@@ -85,3 +84,208 @@ emp_no:integer | language_code:integer | language_name:keyword | language_code_x
8584
10002 | 5 | null | 10
8685
10003 | 4 | german | 8
8786
;
87+
88+
lookupIPFromRow
89+
required_capability: join_lookup_v4
90+
91+
ROW left = "left", client_ip = "172.21.0.5", right = "right"
92+
| LOOKUP JOIN clientips_lookup ON client_ip
93+
;
94+
95+
left:keyword | client_ip:keyword | right:keyword | env:keyword
96+
left | 172.21.0.5 | right | Development
97+
;
98+
99+
lookupIPFromRowWithShadowing
100+
required_capability: join_lookup_v4
101+
102+
ROW left = "left", client_ip = "172.21.0.5", env = "env", right = "right"
103+
| LOOKUP JOIN clientips_lookup ON client_ip
104+
;
105+
106+
left:keyword | client_ip:keyword | right:keyword | env:keyword
107+
left | 172.21.0.5 | right | Development
108+
;
109+
110+
lookupIPFromRowWithShadowingKeep
111+
required_capability: join_lookup_v4
112+
113+
ROW left = "left", client_ip = "172.21.0.5", env = "env", right = "right"
114+
| EVAL client_ip = client_ip::keyword
115+
| LOOKUP JOIN clientips_lookup ON client_ip
116+
| KEEP left, client_ip, right, env
117+
;
118+
119+
left:keyword | client_ip:keyword | right:keyword | env:keyword
120+
left | 172.21.0.5 | right | Development
121+
;
122+
123+
lookupIPFromIndex
124+
required_capability: join_lookup_v4
125+
126+
FROM sample_data
127+
| EVAL client_ip = client_ip::keyword
128+
| LOOKUP JOIN clientips_lookup ON client_ip
129+
;
130+
131+
@timestamp:date | event_duration:long | message:keyword | client_ip:keyword | env:keyword
132+
2023-10-23T13:55:01.543Z | 1756467 | Connected to 10.1.0.1 | 172.21.3.15 | Production
133+
2023-10-23T13:53:55.832Z | 5033755 | Connection error | 172.21.3.15 | Production
134+
2023-10-23T13:52:55.015Z | 8268153 | Connection error | 172.21.3.15 | Production
135+
2023-10-23T13:51:54.732Z | 725448 | Connection error | 172.21.3.15 | Production
136+
2023-10-23T13:33:34.937Z | 1232382 | Disconnected | 172.21.0.5 | Development
137+
2023-10-23T12:27:28.948Z | 2764889 | Connected to 10.1.0.2 | 172.21.2.113 | QA
138+
2023-10-23T12:15:03.360Z | 3450233 | Connected to 10.1.0.3 | 172.21.2.162 | QA
139+
;
140+
141+
lookupIPFromIndexKeep
142+
required_capability: join_lookup_v4
143+
144+
FROM sample_data
145+
| EVAL client_ip = client_ip::keyword
146+
| LOOKUP JOIN clientips_lookup ON client_ip
147+
| KEEP @timestamp, client_ip, event_duration, message, env
148+
;
149+
150+
@timestamp:date | client_ip:keyword | event_duration:long | message:keyword | env:keyword
151+
2023-10-23T13:55:01.543Z | 172.21.3.15 | 1756467 | Connected to 10.1.0.1 | Production
152+
2023-10-23T13:53:55.832Z | 172.21.3.15 | 5033755 | Connection error | Production
153+
2023-10-23T13:52:55.015Z | 172.21.3.15 | 8268153 | Connection error | Production
154+
2023-10-23T13:51:54.732Z | 172.21.3.15 | 725448 | Connection error | Production
155+
2023-10-23T13:33:34.937Z | 172.21.0.5 | 1232382 | Disconnected | Development
156+
2023-10-23T12:27:28.948Z | 172.21.2.113 | 2764889 | Connected to 10.1.0.2 | QA
157+
2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 | Connected to 10.1.0.3 | QA
158+
;
159+
160+
lookupIPFromIndexStats
161+
required_capability: join_lookup_v4
162+
163+
FROM sample_data
164+
| EVAL client_ip = client_ip::keyword
165+
| LOOKUP JOIN clientips_lookup ON client_ip
166+
| STATS count = count(client_ip) BY env
167+
| SORT count DESC, env ASC
168+
;
169+
170+
count:long | env:keyword
171+
4 | Production
172+
2 | QA
173+
1 | Development
174+
;
175+
176+
lookupIPFromIndexStatsKeep
177+
required_capability: join_lookup_v4
178+
179+
FROM sample_data
180+
| EVAL client_ip = client_ip::keyword
181+
| LOOKUP JOIN clientips_lookup ON client_ip
182+
| KEEP client_ip, env
183+
| STATS count = count(client_ip) BY env
184+
| SORT count DESC, env ASC
185+
;
186+
187+
count:long | env:keyword
188+
4 | Production
189+
2 | QA
190+
1 | Development
191+
;
192+
193+
lookupMessageFromRow
194+
required_capability: join_lookup_v4
195+
196+
ROW left = "left", message = "Connected to 10.1.0.1", right = "right"
197+
| LOOKUP JOIN message_types_lookup ON message
198+
;
199+
200+
left:keyword | message:keyword | right:keyword | type:keyword
201+
left | Connected to 10.1.0.1 | right | Success
202+
;
203+
204+
lookupMessageFromRowWithShadowing
205+
required_capability: join_lookup_v4
206+
207+
ROW left = "left", message = "Connected to 10.1.0.1", type = "unknown", right = "right"
208+
| LOOKUP JOIN message_types_lookup ON message
209+
;
210+
211+
left:keyword | message:keyword | right:keyword | type:keyword
212+
left | Connected to 10.1.0.1 | right | Success
213+
;
214+
215+
lookupMessageFromRowWithShadowingKeep
216+
required_capability: join_lookup_v4
217+
218+
ROW left = "left", message = "Connected to 10.1.0.1", type = "unknown", right = "right"
219+
| LOOKUP JOIN message_types_lookup ON message
220+
| KEEP left, message, right, type
221+
;
222+
223+
left:keyword | message:keyword | right:keyword | type:keyword
224+
left | Connected to 10.1.0.1 | right | Success
225+
;
226+
227+
lookupMessageFromIndex
228+
required_capability: join_lookup_v4
229+
230+
FROM sample_data
231+
| LOOKUP JOIN message_types_lookup ON message
232+
;
233+
234+
@timestamp:date | client_ip:ip | event_duration:long | message:keyword | type:keyword
235+
2023-10-23T13:55:01.543Z | 172.21.3.15 | 1756467 | Connected to 10.1.0.1 | Success
236+
2023-10-23T13:53:55.832Z | 172.21.3.15 | 5033755 | Connection error | Error
237+
2023-10-23T13:52:55.015Z | 172.21.3.15 | 8268153 | Connection error | Error
238+
2023-10-23T13:51:54.732Z | 172.21.3.15 | 725448 | Connection error | Error
239+
2023-10-23T13:33:34.937Z | 172.21.0.5 | 1232382 | Disconnected | Disconnected
240+
2023-10-23T12:27:28.948Z | 172.21.2.113 | 2764889 | Connected to 10.1.0.2 | Success
241+
2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 | Connected to 10.1.0.3 | Success
242+
;
243+
244+
lookupMessageFromIndexKeep
245+
required_capability: join_lookup_v4
246+
247+
FROM sample_data
248+
| LOOKUP JOIN message_types_lookup ON message
249+
| KEEP @timestamp, client_ip, event_duration, message, type
250+
;
251+
252+
@timestamp:date | client_ip:ip | event_duration:long | message:keyword | type:keyword
253+
2023-10-23T13:55:01.543Z | 172.21.3.15 | 1756467 | Connected to 10.1.0.1 | Success
254+
2023-10-23T13:53:55.832Z | 172.21.3.15 | 5033755 | Connection error | Error
255+
2023-10-23T13:52:55.015Z | 172.21.3.15 | 8268153 | Connection error | Error
256+
2023-10-23T13:51:54.732Z | 172.21.3.15 | 725448 | Connection error | Error
257+
2023-10-23T13:33:34.937Z | 172.21.0.5 | 1232382 | Disconnected | Disconnected
258+
2023-10-23T12:27:28.948Z | 172.21.2.113 | 2764889 | Connected to 10.1.0.2 | Success
259+
2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 | Connected to 10.1.0.3 | Success
260+
;
261+
262+
lookupMessageFromIndexStats
263+
required_capability: join_lookup_v4
264+
265+
FROM sample_data
266+
| LOOKUP JOIN message_types_lookup ON message
267+
| STATS count = count(message) BY type
268+
| SORT count DESC, type ASC
269+
;
270+
271+
count:long | type:keyword
272+
3 | Error
273+
3 | Success
274+
1 | Disconnected
275+
;
276+
277+
lookupMessageFromIndexStatsKeep
278+
required_capability: join_lookup_v4
279+
280+
FROM sample_data
281+
| LOOKUP JOIN message_types_lookup ON message
282+
| KEEP message, type
283+
| STATS count = count(message) BY type
284+
| SORT count DESC, type ASC
285+
;
286+
287+
count:long | type:keyword
288+
3 | Error
289+
3 | Success
290+
1 | Disconnected
291+
;

0 commit comments

Comments
 (0)