Skip to content

Commit 048c423

Browse files
committed
Support much wider range of types and mixed types
1 parent a388663 commit 048c423

File tree

2 files changed

+123
-26
lines changed
  • x-pack/plugin/esql/src

2 files changed

+123
-26
lines changed

x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/LookupJoinTypesIT.java

Lines changed: 116 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
package org.elasticsearch.xpack.esql.action;
99

1010
import org.elasticsearch.common.settings.Settings;
11+
import org.elasticsearch.index.mapper.extras.MapperExtrasPlugin;
1112
import org.elasticsearch.plugins.Plugin;
1213
import org.elasticsearch.test.ESIntegTestCase;
1314
import org.elasticsearch.test.ESIntegTestCase.ClusterScope;
@@ -20,7 +21,7 @@
2021
import java.util.HashMap;
2122
import java.util.HashSet;
2223
import java.util.Iterator;
23-
import java.util.LinkedHashSet;
24+
import java.util.LinkedHashMap;
2425
import java.util.List;
2526
import java.util.Locale;
2627
import java.util.Map;
@@ -30,6 +31,17 @@
3031

3132
import static org.elasticsearch.test.ESIntegTestCase.Scope.SUITE;
3233
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
34+
import static org.elasticsearch.xpack.esql.core.type.DataType.BOOLEAN;
35+
import static org.elasticsearch.xpack.esql.core.type.DataType.BYTE;
36+
import static org.elasticsearch.xpack.esql.core.type.DataType.DOUBLE;
37+
import static org.elasticsearch.xpack.esql.core.type.DataType.FLOAT;
38+
import static org.elasticsearch.xpack.esql.core.type.DataType.HALF_FLOAT;
39+
import static org.elasticsearch.xpack.esql.core.type.DataType.INTEGER;
40+
import static org.elasticsearch.xpack.esql.core.type.DataType.IP;
41+
import static org.elasticsearch.xpack.esql.core.type.DataType.KEYWORD;
42+
import static org.elasticsearch.xpack.esql.core.type.DataType.LONG;
43+
import static org.elasticsearch.xpack.esql.core.type.DataType.SHORT;
44+
import static org.elasticsearch.xpack.esql.core.type.DataType.TEXT;
3345
import static org.hamcrest.Matchers.containsString;
3446
import static org.hamcrest.Matchers.equalTo;
3547
import static org.hamcrest.Matchers.is;
@@ -90,51 +102,92 @@
90102
@ClusterScope(scope = SUITE, numClientNodes = 1, numDataNodes = 1)
91103
public class LookupJoinTypesIT extends ESIntegTestCase {
92104
protected Collection<Class<? extends Plugin>> nodePlugins() {
93-
return List.of(EsqlPlugin.class);
105+
return List.of(EsqlPlugin.class, MapperExtrasPlugin.class);
94106
}
95107

96108
private static final Map<String, TestConfigs> testConfigurations = new HashMap<>();
97109
static {
98110
// Initialize the test configurations for string tests
99111
{
100-
TestConfigs configs = testConfigurations.computeIfAbsent("strings", k -> new TestConfigs(k, new LinkedHashSet<>()));
101-
configs.addPasses(DataType.KEYWORD, DataType.KEYWORD);
102-
configs.addPasses(DataType.TEXT, DataType.KEYWORD);
103-
configs.addFailsText(DataType.KEYWORD, DataType.TEXT);
104-
configs.addFailsText(DataType.TEXT, DataType.TEXT);
112+
TestConfigs configs = testConfigurations.computeIfAbsent("strings", TestConfigs::new);
113+
configs.addPasses(KEYWORD, KEYWORD);
114+
configs.addPasses(TEXT, KEYWORD);
115+
configs.addFailsText(KEYWORD, TEXT);
116+
configs.addFailsText(TEXT, TEXT);
105117
}
106118

107119
// Test integer types
120+
var integerTypes = List.of(BYTE, SHORT, INTEGER);
108121
{
109-
TestConfigs configs = testConfigurations.computeIfAbsent("integers", k -> new TestConfigs(k, new LinkedHashSet<>()));
110-
var integerTypes = List.of(DataType.BYTE, DataType.SHORT, DataType.INTEGER);
122+
TestConfigs configs = testConfigurations.computeIfAbsent("integers", TestConfigs::new);
111123
for (DataType mainType : integerTypes) {
112124
for (DataType lookupType : integerTypes) {
113125
configs.addPasses(mainType, lookupType);
114126
}
115127
// Long is currently treated differently in the validation, but we could consider changing that
116-
configs.addFails(mainType, DataType.LONG);
117-
configs.addFails(DataType.LONG, mainType);
128+
configs.addFails(mainType, LONG);
129+
configs.addFails(LONG, mainType);
118130
}
119131
}
120132

121133
// Test float and double
134+
var floatTypes = List.of(HALF_FLOAT, FLOAT, DOUBLE);
122135
{
123-
TestConfigs configs = testConfigurations.computeIfAbsent("floats", k -> new TestConfigs(k, new LinkedHashSet<>()));
124-
var floatTypes = List.of(DataType.FLOAT, DataType.DOUBLE);
136+
TestConfigs configs = testConfigurations.computeIfAbsent("floats", TestConfigs::new);
125137
for (DataType mainType : floatTypes) {
126138
for (DataType lookupType : floatTypes) {
127139
configs.addPasses(mainType, lookupType);
128140
}
129141
}
130142
}
131143

144+
// Tests for mixed-numerical types
145+
{
146+
TestConfigs configs = testConfigurations.computeIfAbsent("mixed-numerical", TestConfigs::new);
147+
for (DataType mainType : integerTypes) {
148+
for (DataType lookupType : floatTypes) {
149+
// TODO: We should probably allow this, but we need to change the validation code in Join.java
150+
configs.addFails(mainType, lookupType);
151+
configs.addFails(lookupType, mainType);
152+
}
153+
}
154+
}
155+
156+
// Tests for all types where left and right are the same type
157+
// DataType[] all = { BOOLEAN, LONG, INTEGER, DOUBLE, SHORT, BYTE, FLOAT, HALF_FLOAT, DATETIME, DATE_NANOS, IP, KEYWORD };
158+
DataType[] all = { BOOLEAN, LONG, INTEGER, DOUBLE, SHORT, BYTE, FLOAT, HALF_FLOAT, IP, KEYWORD };
159+
{
160+
Collection<TestConfigs> existing = testConfigurations.values();
161+
TestConfigs configs = testConfigurations.computeIfAbsent("same", TestConfigs::new);
162+
for (DataType type : all) {
163+
if (existingIndex(existing, type, type)) {
164+
// Skip existing configurations
165+
continue;
166+
}
167+
configs.addPasses(type, type);
168+
}
169+
}
170+
171+
// Tests for all other type combinations
172+
{
173+
Collection<TestConfigs> existing = testConfigurations.values();
174+
TestConfigs configs = testConfigurations.computeIfAbsent("others", TestConfigs::new);
175+
for (DataType mainType : all) {
176+
for (DataType lookupType : all) {
177+
if (existingIndex(existing, mainType, lookupType)) {
178+
// Skip existing configurations
179+
continue;
180+
}
181+
configs.addFails(mainType, lookupType);
182+
}
183+
}
184+
}
132185
// TODO: Add tests for mixed groups (should mostly fail, but might be some implicit casting to consider)
133186

134187
// Make sure we have never added two configurations with the same index name
135188
Set<String> knownTypes = new HashSet<>();
136189
for (TestConfigs configs : testConfigurations.values()) {
137-
for (TestConfig config : configs.configs()) {
190+
for (TestConfig config : configs.configs.values()) {
138191
if (knownTypes.contains(config.indexName())) {
139192
throw new IllegalArgumentException("Duplicate index name: " + config.indexName());
140193
}
@@ -143,6 +196,11 @@ protected Collection<Class<? extends Plugin>> nodePlugins() {
143196
}
144197
}
145198

199+
private static boolean existingIndex(Collection<TestConfigs> existing, DataType mainType, DataType lookupType) {
200+
String indexName = "index_" + mainType.esType() + "_" + lookupType.esType();
201+
return existing.stream().anyMatch(c -> c.exists(indexName));
202+
}
203+
146204
public void testLookupJoinStrings() {
147205
testLookupJoinTypes("strings");
148206
}
@@ -155,10 +213,22 @@ public void testLookupJoinFloats() {
155213
testLookupJoinTypes("floats");
156214
}
157215

216+
public void testLookupJoinMixedNumerical() {
217+
testLookupJoinTypes("mixed-numerical");
218+
}
219+
220+
public void testLookupJoinSame() {
221+
testLookupJoinTypes("same");
222+
}
223+
224+
public void testLookupJoinOthers() {
225+
testLookupJoinTypes("others");
226+
}
227+
158228
private void testLookupJoinTypes(String group) {
159229
initIndexes(group);
160230
initData(group);
161-
for (TestConfig config : testConfigurations.get(group).configs()) {
231+
for (TestConfig config : testConfigurations.get(group).configs.values()) {
162232
String query = String.format(
163233
Locale.ROOT,
164234
"FROM index | LOOKUP JOIN %s ON %s | KEEP other",
@@ -172,7 +242,7 @@ private void testLookupJoinTypes(String group) {
172242
}
173243

174244
private void initIndexes(String group) {
175-
Set<TestConfig> configs = testConfigurations.get(group).configs;
245+
Collection<TestConfig> configs = testConfigurations.get(group).configs.values();
176246
// The main index will have many fields, one of each type to use in later type specific joins
177247
String mainFields = "{\n \"properties\" : {\n"
178248
+ configs.stream().map(TestConfig::mainPropertySpec).distinct().collect(Collectors.joining(",\n "))
@@ -193,7 +263,7 @@ private void initIndexes(String group) {
193263
}
194264

195265
private void initData(String group) {
196-
Set<TestConfig> configs = testConfigurations.get(group).configs;
266+
Collection<TestConfig> configs = testConfigurations.get(group).configs.values();
197267
int docId = 0;
198268
for (TestConfig config : configs) {
199269
String doc = String.format(Locale.ROOT, """
@@ -224,29 +294,50 @@ private String mainPropertyFor(TestConfig config) {
224294

225295
private static String sampleDataTextFor(DataType type) {
226296
return switch (type) {
227-
case KEYWORD, TEXT -> "\"" + sampleDataFor(type) + "\"";
297+
case KEYWORD, TEXT, DATETIME, DATE_NANOS, IP -> "\"" + sampleDataFor(type) + "\"";
228298
default -> String.valueOf(sampleDataFor(type));
229299
};
230300
}
231301

232302
private static Object sampleDataFor(DataType type) {
233303
return switch (type) {
304+
case BOOLEAN -> true;
305+
case DATETIME, DATE_NANOS -> "2025-04-02T12:00:00.000Z";
306+
case IP -> "127.0.0.1";
234307
case KEYWORD, TEXT -> "key";
235308
case BYTE, SHORT, INTEGER -> 1;
236309
case LONG -> 1L;
237-
case FLOAT, DOUBLE -> 1.0;
310+
case HALF_FLOAT, FLOAT, DOUBLE -> 1.0;
238311
default -> throw new IllegalArgumentException("Unsupported type: " + type);
239312
};
240313
}
241314

242-
private record TestConfigs(String group, Set<TestConfig> configs) {
315+
private static class TestConfigs {
316+
final String group;
317+
final Map<String, TestConfig> configs;
318+
319+
TestConfigs(String group) {
320+
this.group = group;
321+
this.configs = new LinkedHashMap<>();
322+
}
323+
324+
private boolean exists(String indexName) {
325+
return configs.containsKey(indexName);
326+
}
327+
328+
private void add(TestConfig config) {
329+
if (configs.containsKey(config.indexName())) {
330+
throw new IllegalArgumentException("Duplicate index name: " + config.indexName());
331+
}
332+
configs.put(config.indexName(), config);
333+
}
243334

244335
private void addPasses(DataType mainType, DataType lookupType) {
245-
configs.add(new TestConfigPasses(mainType, lookupType, true));
336+
add(new TestConfigPasses(mainType, lookupType, true));
246337
}
247338

248339
private void addEmptyResult(DataType mainType, DataType lookupType) {
249-
configs.add(new TestConfigPasses(mainType, lookupType, false));
340+
add(new TestConfigPasses(mainType, lookupType, false));
250341
}
251342

252343
private void addFails(DataType mainType, DataType lookupType) {
@@ -259,7 +350,7 @@ private void addFails(DataType mainType, DataType lookupType) {
259350
fieldName,
260351
lookupType.widenSmallNumeric()
261352
);
262-
configs.add(
353+
add(
263354
new TestConfigFails<>(
264355
mainType,
265356
lookupType,
@@ -272,7 +363,7 @@ private void addFails(DataType mainType, DataType lookupType) {
272363
private void addFailsText(DataType mainType, DataType lookupType) {
273364
String fieldName = "field_" + mainType.esType();
274365
String errorMessage = String.format(Locale.ROOT, "JOIN with right field [%s] of type [TEXT] is not supported", fieldName);
275-
configs.add(
366+
add(
276367
new TestConfigFails<>(
277368
mainType,
278369
lookupType,
@@ -283,7 +374,7 @@ private void addFailsText(DataType mainType, DataType lookupType) {
283374
}
284375

285376
private <E extends Exception> void addFails(DataType mainType, DataType lookupType, Class<E> exception, Consumer<E> assertion) {
286-
configs.add(new TestConfigFails<>(mainType, lookupType, exception, assertion));
377+
add(new TestConfigFails<>(mainType, lookupType, exception, assertion));
287378
}
288379
}
289380

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/join/Join.java

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,7 @@ public void postAnalysisVerification(Failures failures) {
217217
for (int i = 0; i < config.leftFields().size(); i++) {
218218
Attribute leftField = config.leftFields().get(i);
219219
Attribute rightField = config.rightFields().get(i);
220-
if (leftField.dataType().noText() != rightField.dataType().noText()) {
220+
if (comparableTypes(leftField, rightField) == false) {
221221
failures.add(
222222
fail(
223223
leftField,
@@ -236,4 +236,10 @@ public void postAnalysisVerification(Failures failures) {
236236
}
237237
}
238238
}
239+
240+
private static boolean comparableTypes(Attribute left, Attribute right) {
241+
// TODO: Consider allowing more valid types
242+
// return left.dataType().noText() == right.dataType().noText() || left.dataType().isNumeric() == right.dataType().isNumeric();
243+
return left.dataType().noText() == right.dataType().noText();
244+
}
239245
}

0 commit comments

Comments
 (0)