Skip to content

Commit b0fce46

Browse files
authored
ESQL: tests for LOOKUP JOIN with non-unique join keys (#118471) (#118661)
Add a csv dataset and tests for `LOOKUP JOIN` where the join keys are not unique. In particular, add tests that include MVs and nulls to see how `LOOKUP JOIN` treats these.
1 parent bfb3165 commit b0fce46

File tree

3 files changed

+90
-1
lines changed

3 files changed

+90
-1
lines changed

x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,8 @@ public class CsvTestsDataLoader {
6262
private static final TestsDataset LANGUAGES = new TestsDataset("languages");
6363
private static final TestsDataset LANGUAGES_LOOKUP = LANGUAGES.withIndex("languages_lookup")
6464
.withSetting("languages_lookup-settings.json");
65+
private static final TestsDataset LANGUAGES_LOOKUP_NON_UNIQUE_KEY = LANGUAGES_LOOKUP.withIndex("languages_lookup_non_unique_key")
66+
.withData("languages_non_unique_key.csv");
6567
private static final TestsDataset ALERTS = new TestsDataset("alerts");
6668
private static final TestsDataset UL_LOGS = new TestsDataset("ul_logs");
6769
private static final TestsDataset SAMPLE_DATA = new TestsDataset("sample_data");
@@ -113,6 +115,7 @@ public class CsvTestsDataLoader {
113115
Map.entry(APPS_SHORT.indexName, APPS_SHORT),
114116
Map.entry(LANGUAGES.indexName, LANGUAGES),
115117
Map.entry(LANGUAGES_LOOKUP.indexName, LANGUAGES_LOOKUP),
118+
Map.entry(LANGUAGES_LOOKUP_NON_UNIQUE_KEY.indexName, LANGUAGES_LOOKUP_NON_UNIQUE_KEY),
116119
Map.entry(UL_LOGS.indexName, UL_LOGS),
117120
Map.entry(SAMPLE_DATA.indexName, SAMPLE_DATA),
118121
Map.entry(MV_SAMPLE_DATA.indexName, MV_SAMPLE_DATA),
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
language_code:integer,language_name:keyword,country:keyword
2+
1,English,Canada
3+
1,English,
4+
1,,United Kingdom
5+
1,English,United States of America
6+
2,German,[Germany,Austria]
7+
2,German,Switzerland
8+
2,German,
9+
4,Quenya,
10+
5,,Atlantis

x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec

Lines changed: 77 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
// Reuses the sample dataset and commands from enrich.csv-spec
44
//
55

6-
//TODO: this sometimes returns null instead of the looked up value (likely related to the execution order)
76
basicOnTheDataNode
87
required_capability: join_lookup_v5
98

@@ -102,6 +101,83 @@ emp_no:integer | language_code:integer | language_name:keyword
102101
10003 | 4 | German
103102
;
104103

104+
nonUniqueLeftKeyOnTheDataNode
105+
required_capability: join_lookup_v5
106+
107+
FROM employees
108+
| WHERE emp_no <= 10030
109+
| EVAL language_code = emp_no % 10
110+
| WHERE language_code < 3
111+
| LOOKUP JOIN languages_lookup ON language_code
112+
| SORT emp_no
113+
| KEEP emp_no, language_code, language_name
114+
;
115+
116+
emp_no:integer | language_code:integer | language_name:keyword
117+
10001 |1 | English
118+
10002 |2 | French
119+
10010 |0 | null
120+
10011 |1 | English
121+
10012 |2 | French
122+
10020 |0 | null
123+
10021 |1 | English
124+
10022 |2 | French
125+
10030 |0 | null
126+
;
127+
128+
nonUniqueRightKeyOnTheDataNode
129+
required_capability: join_lookup_v5
130+
131+
FROM employees
132+
| EVAL language_code = emp_no % 10
133+
| LOOKUP JOIN languages_lookup_non_unique_key ON language_code
134+
| WHERE emp_no > 10090 AND emp_no < 10096
135+
| SORT emp_no
136+
| EVAL country = MV_SORT(country)
137+
| KEEP emp_no, language_code, language_name, country
138+
;
139+
140+
emp_no:integer | language_code:integer | language_name:keyword | country:keyword
141+
10091 | 1 | [English, English, English] | [Canada, United Kingdom, United States of America]
142+
10092 | 2 | [German, German, German] | [Austria, Germany, Switzerland]
143+
10093 | 3 | null | null
144+
10094 | 4 | Quenya | null
145+
10095 | 5 | null | Atlantis
146+
;
147+
148+
nonUniqueRightKeyOnTheCoordinator
149+
required_capability: join_lookup_v5
150+
151+
FROM employees
152+
| SORT emp_no
153+
| LIMIT 5
154+
| EVAL language_code = emp_no % 10
155+
| LOOKUP JOIN languages_lookup_non_unique_key ON language_code
156+
| EVAL country = MV_SORT(country)
157+
| KEEP emp_no, language_code, language_name, country
158+
;
159+
160+
emp_no:integer | language_code:integer | language_name:keyword | country:keyword
161+
10001 | 1 | [English, English, English] | [Canada, United Kingdom, United States of America]
162+
10002 | 2 | [German, German, German] | [Austria, Germany, Switzerland]
163+
10003 | 3 | null | null
164+
10004 | 4 | Quenya | null
165+
10005 | 5 | null | Atlantis
166+
;
167+
168+
nonUniqueRightKeyFromRow
169+
required_capability: join_lookup_v5
170+
171+
ROW language_code = 2
172+
| LOOKUP JOIN languages_lookup_non_unique_key ON language_code
173+
| DROP country.keyword
174+
| EVAL country = MV_SORT(country)
175+
;
176+
177+
language_code:integer | language_name:keyword | country:keyword
178+
2 | [German, German, German] | [Austria, Germany, Switzerland]
179+
;
180+
105181
lookupIPFromRow
106182
required_capability: join_lookup_v5
107183

0 commit comments

Comments
 (0)