diff --git a/docs/changelog/127962.yaml b/docs/changelog/127962.yaml new file mode 100644 index 0000000000000..f96b79a69d5b3 --- /dev/null +++ b/docs/changelog/127962.yaml @@ -0,0 +1,6 @@ +pr: 127962 +summary: Support DATE_NANOS in LOOKUP JOIN +area: ES|QL +type: bug +issues: + - 127249 diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DateFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/DateFieldMapper.java index 5ddaff8b961bb..a102f6c8e55ad 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DateFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DateFieldMapper.java @@ -473,19 +473,12 @@ public DateFieldType(String name) { this(name, true, true, false, true, DEFAULT_DATE_TIME_FORMATTER, Resolution.MILLISECONDS, null, null, Collections.emptyMap()); } + public DateFieldType(String name, boolean isIndexed, Resolution resolution) { + this(name, isIndexed, isIndexed, false, true, DEFAULT_DATE_TIME_FORMATTER, resolution, null, null, Collections.emptyMap()); + } + public DateFieldType(String name, boolean isIndexed) { - this( - name, - isIndexed, - isIndexed, - false, - true, - DEFAULT_DATE_TIME_FORMATTER, - Resolution.MILLISECONDS, - null, - null, - Collections.emptyMap() - ); + this(name, isIndexed, Resolution.MILLISECONDS); } public DateFieldType(String name, DateFormatter dateFormatter) { @@ -699,6 +692,54 @@ public static long parseToLong( return resolution.convert(dateParser.parse(BytesRefs.toString(value), now, roundUp, zone)); } + /** + * Similar to the {@link DateFieldType#termQuery} method, but works on dates that are already parsed to a long + * in the same precision as the field mapper. + */ + public Query equalityQuery(Long value, @Nullable SearchExecutionContext context) { + return rangeQuery(value, value, true, true, context); + } + + /** + * Similar to the existing + * {@link DateFieldType#rangeQuery(Object, Object, boolean, boolean, ShapeRelation, ZoneId, DateMathParser, SearchExecutionContext)} + * method, but works on dates that are already parsed to a long in the same precision as the field mapper. + */ + public Query rangeQuery( + Long lowerTerm, + Long upperTerm, + boolean includeLower, + boolean includeUpper, + SearchExecutionContext context + ) { + failIfNotIndexedNorDocValuesFallback(context); + long l, u; + if (lowerTerm == null) { + l = Long.MIN_VALUE; + } else { + l = (includeLower == false) ? lowerTerm + 1 : lowerTerm; + } + if (upperTerm == null) { + u = Long.MAX_VALUE; + } else { + u = (includeUpper == false) ? upperTerm - 1 : upperTerm; + } + Query query; + if (isIndexed()) { + query = LongPoint.newRangeQuery(name(), l, u); + if (hasDocValues()) { + Query dvQuery = SortedNumericDocValuesField.newSlowRangeQuery(name(), l, u); + query = new IndexOrDocValuesQuery(query, dvQuery); + } + } else { + query = SortedNumericDocValuesField.newSlowRangeQuery(name(), l, u); + } + if (hasDocValues() && context.indexSortedOnField(name())) { + query = new IndexSortSortedNumericDocValuesRangeQuery(name(), l, u, query); + } + return query; + } + @Override public Query distanceFeatureQuery(Object origin, String pivot, SearchExecutionContext context) { failIfNotIndexedNorDocValuesFallback(context); diff --git a/server/src/test/java/org/elasticsearch/index/mapper/DateFieldTypeTests.java b/server/src/test/java/org/elasticsearch/index/mapper/DateFieldTypeTests.java index d925a9dd1d691..2f3781f63e5ee 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/DateFieldTypeTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/DateFieldTypeTests.java @@ -208,13 +208,11 @@ public void testValueForSearch() { assertEquals(date, ft.valueForDisplay(instant)); } + /** + * If the term field is a string of date-time format with exact seconds (no sub-seconds), any data within a 1second range will match. + */ public void testTermQuery() { - Settings indexSettings = indexSettings(IndexVersion.current(), 1, 1).build(); - SearchExecutionContext context = SearchExecutionContextHelper.createSimple( - new IndexSettings(IndexMetadata.builder("foo").settings(indexSettings).build(), indexSettings), - parserConfig(), - writableRegistry() - ); + SearchExecutionContext context = prepareIndexForTermQuery(); MappedFieldType ft = new DateFieldType("field"); String date = "2015-10-12T14:10:55"; long instant = DateFormatters.from(DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parse(date)).toInstant().toEpochMilli(); @@ -228,45 +226,99 @@ public void testTermQuery() { expected = SortedNumericDocValuesField.newSlowRangeQuery("field", instant, instant + 999); assertEquals(expected, ft.termQuery(date, context)); - MappedFieldType unsearchable = new DateFieldType( - "field", - false, - false, - false, - DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER, - Resolution.MILLISECONDS, - null, - null, - Collections.emptyMap() + assertIndexUnsearchable(Resolution.MILLISECONDS, (unsearchable) -> unsearchable.termQuery(date, context)); + } + + /** + * If the term field is a string of date-time format with sub-seconds, only data with exact ms precision will match. + */ + public void testTermQuerySubseconds() { + SearchExecutionContext context = prepareIndexForTermQuery(); + MappedFieldType ft = new DateFieldType("field"); + String date = "2015-10-12T14:10:55.01"; + long instant = DateFormatters.from(DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parse(date)).toInstant().toEpochMilli(); + Query expected = new IndexOrDocValuesQuery( + LongPoint.newRangeQuery("field", instant, instant), + SortedNumericDocValuesField.newSlowRangeQuery("field", instant, instant) ); - IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> unsearchable.termQuery(date, context)); - assertEquals("Cannot search on field [field] since it is not indexed nor has doc values.", e.getMessage()); + assertEquals(expected, ft.termQuery(date, context)); + + ft = new DateFieldType("field", false); + expected = SortedNumericDocValuesField.newSlowRangeQuery("field", instant, instant); + assertEquals(expected, ft.termQuery(date, context)); + + assertIndexUnsearchable(Resolution.MILLISECONDS, (unsearchable) -> unsearchable.termQuery(date, context)); } - public void testRangeQuery() throws IOException { - Settings indexSettings = indexSettings(IndexVersion.current(), 1, 1).build(); - SearchExecutionContext context = new SearchExecutionContext( - 0, - 0, - new IndexSettings(IndexMetadata.builder("foo").settings(indexSettings).build(), indexSettings), - null, - null, - null, - MappingLookup.EMPTY, - null, - null, - parserConfig(), - writableRegistry(), - null, - null, - () -> nowInMillis, - null, - null, - () -> true, - null, - Collections.emptyMap(), - MapperMetrics.NOOP + /** + * If the term field is a string of the long value (ms since epoch), only data with exact ms precision will match. + */ + public void testTermQueryMillis() { + SearchExecutionContext context = prepareIndexForTermQuery(); + MappedFieldType ft = new DateFieldType("field"); + String date = "2015-10-12T14:10:55"; + long instant = DateFormatters.from(DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parse(date)).toInstant().toEpochMilli(); + Query expected = new IndexOrDocValuesQuery( + LongPoint.newRangeQuery("field", instant, instant), + SortedNumericDocValuesField.newSlowRangeQuery("field", instant, instant) ); + assertEquals(expected, ft.termQuery(instant, context)); + + ft = new DateFieldType("field", false); + expected = SortedNumericDocValuesField.newSlowRangeQuery("field", instant, instant); + assertEquals(expected, ft.termQuery(instant, context)); + + assertIndexUnsearchable(Resolution.MILLISECONDS, (unsearchable) -> unsearchable.termQuery(instant, context)); + } + + /** + * This query has similar behaviour to passing a String containing a long to termQuery, only data with exact ms precision will match. + */ + public void testEqualityQuery() { + SearchExecutionContext context = prepareIndexForTermQuery(); + DateFieldType ft = new DateFieldType("field"); + String date = "2015-10-12T14:10:55"; + long instant = DateFormatters.from(DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parse(date)).toInstant().toEpochMilli(); + Query expected = new IndexOrDocValuesQuery( + LongPoint.newRangeQuery("field", instant, instant), + SortedNumericDocValuesField.newSlowRangeQuery("field", instant, instant) + ); + assertEquals(expected, ft.equalityQuery(instant, context)); + + ft = new DateFieldType("field", false); + expected = SortedNumericDocValuesField.newSlowRangeQuery("field", instant, instant); + assertEquals(expected, ft.equalityQuery(instant, context)); + + assertIndexUnsearchable(Resolution.MILLISECONDS, (unsearchable) -> unsearchable.equalityQuery(instant, context)); + } + + /** + * This query supports passing a ns value, and only data with exact ns precision will match. + */ + public void testEqualityNanosQuery() { + SearchExecutionContext context = prepareIndexForTermQuery(); + DateFieldType ft = new DateFieldType("field", Resolution.NANOSECONDS); + String date = "2015-10-12T14:10:55"; + long instant = DateFormatters.from(DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parse(date)).toInstant().toEpochMilli() * 1000000L; + Query expected = new IndexOrDocValuesQuery( + LongPoint.newRangeQuery("field", instant, instant), + SortedNumericDocValuesField.newSlowRangeQuery("field", instant, instant) + ); + assertEquals(expected, ft.equalityQuery(instant, context)); + + ft = new DateFieldType("field", false); + expected = SortedNumericDocValuesField.newSlowRangeQuery("field", instant, instant); + assertEquals(expected, ft.equalityQuery(instant, context)); + + assertIndexUnsearchable(Resolution.NANOSECONDS, (unsearchable) -> unsearchable.equalityQuery(instant, context)); + } + + /** + * If the term fields are strings of date-time format with exact seconds (no sub-seconds), + * the second field will be rounded up to the next second. + */ + public void testRangeQuery() throws IOException { + SearchExecutionContext context = prepareIndexForRangeQuery(); MappedFieldType ft = new DateFieldType("field"); String date1 = "2015-10-12T14:10:55"; String date2 = "2016-04-28T11:33:52"; @@ -298,22 +350,105 @@ public void testRangeQuery() throws IOException { expected2 = new DateRangeIncludingNowQuery(SortedNumericDocValuesField.newSlowRangeQuery("field", instant1, instant2)); assertEquals(expected2, ft2.rangeQuery("now", instant2, true, true, null, null, null, context)); - MappedFieldType unsearchable = new DateFieldType( - "field", - false, - false, - false, - DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER, + assertIndexUnsearchable( Resolution.MILLISECONDS, - null, - null, - Collections.emptyMap() + (unsearchable) -> unsearchable.rangeQuery(date1, date2, true, true, null, null, null, context) ); - IllegalArgumentException e = expectThrows( - IllegalArgumentException.class, - () -> unsearchable.rangeQuery(date1, date2, true, true, null, null, null, context) + } + + /** + * If the term fields are strings of date-time format with sub-seconds, + * the lower and upper values will be matched inclusively to the ms. + */ + public void testRangeQuerySubseconds() throws IOException { + SearchExecutionContext context = prepareIndexForRangeQuery(); + MappedFieldType ft = new DateFieldType("field"); + String date1 = "2015-10-12T14:10:55.01"; + String date2 = "2016-04-28T11:33:52.01"; + long instant1 = DateFormatters.from(DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parse(date1)).toInstant().toEpochMilli(); + long instant2 = DateFormatters.from(DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parse(date2)).toInstant().toEpochMilli(); + Query expected = new IndexOrDocValuesQuery( + LongPoint.newRangeQuery("field", instant1, instant2), + SortedNumericDocValuesField.newSlowRangeQuery("field", instant1, instant2) + ); + assertEquals(expected, ft.rangeQuery(date1, date2, true, true, null, null, null, context).rewrite(newSearcher(new MultiReader()))); + + MappedFieldType ft2 = new DateFieldType("field", false); + Query expected2 = SortedNumericDocValuesField.newSlowRangeQuery("field", instant1, instant2); + assertEquals( + expected2, + ft2.rangeQuery(date1, date2, true, true, null, null, null, context).rewrite(newSearcher(new MultiReader())) + ); + + instant1 = nowInMillis; + instant2 = instant1 + 100; + expected = new DateRangeIncludingNowQuery( + new IndexOrDocValuesQuery( + LongPoint.newRangeQuery("field", instant1, instant2), + SortedNumericDocValuesField.newSlowRangeQuery("field", instant1, instant2) + ) + ); + assertEquals(expected, ft.rangeQuery("now", instant2, true, true, null, null, null, context)); + + expected2 = new DateRangeIncludingNowQuery(SortedNumericDocValuesField.newSlowRangeQuery("field", instant1, instant2)); + assertEquals(expected2, ft2.rangeQuery("now", instant2, true, true, null, null, null, context)); + + assertIndexUnsearchable( + Resolution.MILLISECONDS, + (unsearchable) -> unsearchable.rangeQuery(date1, date2, true, true, null, null, null, context) ); - assertEquals("Cannot search on field [field] since it is not indexed nor has doc values.", e.getMessage()); + } + + /** + * If the term fields are strings of long ms, the lower and upper values will be matched inclusively to the ms. + */ + public void testRangeQueryMillis() throws IOException { + SearchExecutionContext context = prepareIndexForRangeQuery(); + DateFieldType ft = new DateFieldType("field"); + String date1 = "2015-10-12T14:10:55.01"; + String date2 = "2016-04-28T11:33:52.01"; + long instant1 = DateFormatters.from(DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parse(date1)).toInstant().toEpochMilli(); + long instant2 = DateFormatters.from(DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parse(date2)).toInstant().toEpochMilli(); + Query expected = new IndexOrDocValuesQuery( + LongPoint.newRangeQuery("field", instant1, instant2), + SortedNumericDocValuesField.newSlowRangeQuery("field", instant1, instant2) + ); + assertEquals(expected, ft.rangeQuery(instant1, instant2, true, true, context).rewrite(newSearcher(new MultiReader()))); + + DateFieldType ft2 = new DateFieldType("field", false); + Query expected2 = SortedNumericDocValuesField.newSlowRangeQuery("field", instant1, instant2); + assertEquals(expected2, ft2.rangeQuery(instant1, instant2, true, true, context).rewrite(newSearcher(new MultiReader()))); + + assertIndexUnsearchable( + Resolution.MILLISECONDS, + (unsearchable) -> unsearchable.rangeQuery(instant1, instant2, true, true, context) + ); + } + + /** + * If the term fields are strings of long ns, the lower and upper values will be matched inclusively to the ns. + */ + public void testRangeQueryNanos() throws IOException { + SearchExecutionContext context = prepareIndexForRangeQuery(); + DateFieldType ft = new DateFieldType("field", Resolution.NANOSECONDS); + String date1 = "2015-10-12T14:10:55.01"; + String date2 = "2016-04-28T11:33:52.01"; + long instant1 = DateFormatters.from(DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parse(date1)).toInstant().toEpochMilli() * 1000000L; + long instant2 = DateFormatters.from(DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parse(date2)).toInstant().toEpochMilli() * 1000000L; + Query expected = new IndexOrDocValuesQuery( + LongPoint.newRangeQuery("field", instant1, instant2), + SortedNumericDocValuesField.newSlowRangeQuery("field", instant1, instant2) + ); + assertEquals(expected, ft.rangeQuery(instant1, instant2, true, true, context).rewrite(newSearcher(new MultiReader()))); + + DateFieldType ft2 = new DateFieldType("field", false, Resolution.NANOSECONDS); + Query expected2 = SortedNumericDocValuesField.newSlowRangeQuery("field", instant1, instant2); + assertEquals( + expected2, + ft2.rangeQuery(date1, date2, true, true, null, null, null, context).rewrite(newSearcher(new MultiReader())) + ); + + assertIndexUnsearchable(Resolution.NANOSECONDS, (unsearchable) -> unsearchable.rangeQuery(instant1, instant2, true, true, context)); } public void testRangeQueryWithIndexSort() { @@ -420,4 +555,55 @@ public void testParseSourceValueNanos() throws IOException { MappedFieldType nullValueMapper = fieldType(Resolution.NANOSECONDS, "strict_date_time||epoch_millis", nullValueDate); assertEquals(List.of(nullValueDate), fetchSourceValue(nullValueMapper, null)); } + + private SearchExecutionContext prepareIndexForTermQuery() { + Settings indexSettings = indexSettings(IndexVersion.current(), 1, 1).build(); + return SearchExecutionContextHelper.createSimple( + new IndexSettings(IndexMetadata.builder("foo").settings(indexSettings).build(), indexSettings), + parserConfig(), + writableRegistry() + ); + } + + private SearchExecutionContext prepareIndexForRangeQuery() { + Settings indexSettings = indexSettings(IndexVersion.current(), 1, 1).build(); + return new SearchExecutionContext( + 0, + 0, + new IndexSettings(IndexMetadata.builder("foo").settings(indexSettings).build(), indexSettings), + null, + null, + null, + MappingLookup.EMPTY, + null, + null, + parserConfig(), + writableRegistry(), + null, + null, + () -> nowInMillis, + null, + null, + () -> true, + null, + Collections.emptyMap(), + MapperMetrics.NOOP + ); + } + + private void assertIndexUnsearchable(Resolution resolution, ThrowingConsumer runnable) { + DateFieldType unsearchable = new DateFieldType( + "field", + false, + false, + false, + DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER, + resolution, + null, + null, + Collections.emptyMap() + ); + IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> runnable.accept(unsearchable)); + assertEquals("Cannot search on field [field] since it is not indexed nor has doc values.", e.getMessage()); + } } diff --git a/x-pack/plugin/esql/build.gradle b/x-pack/plugin/esql/build.gradle index 2310de66ae86b..a0793635dc315 100644 --- a/x-pack/plugin/esql/build.gradle +++ b/x-pack/plugin/esql/build.gradle @@ -46,6 +46,7 @@ dependencies { testImplementation project(path: xpackModule('enrich')) testImplementation project(path: xpackModule('spatial')) testImplementation project(path: xpackModule('kql')) + testImplementation project(path: xpackModule('mapper-unsigned-long')) testImplementation project(path: ':modules:reindex') testImplementation project(path: ':modules:parent-join') diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/lookup/QueryList.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/lookup/QueryList.java index 5d359e2fb612f..d6fb6e4bfcdec 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/lookup/QueryList.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/lookup/QueryList.java @@ -11,6 +11,7 @@ import org.apache.lucene.geo.GeoEncodingUtils; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.ConstantScoreQuery; import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.Query; import org.apache.lucene.util.BytesRef; @@ -30,6 +31,7 @@ import org.elasticsearch.geometry.Point; import org.elasticsearch.geometry.utils.GeometryValidator; import org.elasticsearch.geometry.utils.WellKnownBinary; +import org.elasticsearch.index.mapper.DateFieldMapper; import org.elasticsearch.index.mapper.GeoShapeQueryable; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.RangeFieldMapper; @@ -37,7 +39,9 @@ import java.io.IOException; import java.util.ArrayList; +import java.util.HashSet; import java.util.List; +import java.util.Set; import java.util.function.IntFunction; /** @@ -189,6 +193,14 @@ public static QueryList dateTermQueryList(MappedFieldType field, SearchExecution ); } + /** + * Returns a list of term queries for the given field and the input block of + * {@code date_nanos} field values. + */ + public static QueryList dateNanosTermQueryList(MappedFieldType field, SearchExecutionContext searchExecutionContext, LongBlock block) { + return new DateNanosQueryList(field, searchExecutionContext, block, false); + } + /** * Returns a list of geo_shape queries for the given field and the input block. */ @@ -232,6 +244,68 @@ Query doGetQuery(int position, int firstValueIndex, int valueCount) { } } + private static class DateNanosQueryList extends QueryList { + protected final IntFunction blockValueReader; + private final DateFieldMapper.DateFieldType dateFieldType; + + private DateNanosQueryList( + MappedFieldType field, + SearchExecutionContext searchExecutionContext, + LongBlock block, + boolean onlySingleValues + ) { + super(field, searchExecutionContext, block, onlySingleValues); + if (field instanceof RangeFieldMapper.RangeFieldType rangeFieldType) { + // TODO: do this validation earlier + throw new IllegalArgumentException( + "DateNanosQueryList does not support range fields [" + rangeFieldType + "]: " + field.name() + ); + } + this.blockValueReader = block::getLong; + if (field instanceof DateFieldMapper.DateFieldType dateFieldType) { + // Validate that the field is a date_nanos field + // TODO: Consider allowing date_nanos to match normal datetime fields + if (dateFieldType.resolution() != DateFieldMapper.Resolution.NANOSECONDS) { + throw new IllegalArgumentException( + "DateNanosQueryList only supports date_nanos fields, but got: " + field.typeName() + " for field: " + field.name() + ); + } + this.dateFieldType = dateFieldType; + } else { + throw new IllegalArgumentException( + "DateNanosQueryList only supports date_nanos fields, but got: " + field.typeName() + " for field: " + field.name() + ); + } + } + + @Override + public DateNanosQueryList onlySingleValues() { + return new DateNanosQueryList(field, searchExecutionContext, (LongBlock) block, true); + } + + @Override + Query doGetQuery(int position, int firstValueIndex, int valueCount) { + return switch (valueCount) { + case 0 -> null; + case 1 -> dateFieldType.equalityQuery(blockValueReader.apply(firstValueIndex), searchExecutionContext); + default -> { + // The following code is a slight simplification of the DateFieldMapper.termsQuery method + final Set values = new HashSet<>(valueCount); + BooleanQuery.Builder builder = new BooleanQuery.Builder(); + for (int i = 0; i < valueCount; i++) { + final Long value = blockValueReader.apply(firstValueIndex + i); + if (values.contains(value)) { + continue; // Skip duplicates + } + values.add(value); + builder.add(dateFieldType.equalityQuery(value, searchExecutionContext), BooleanClause.Occur.SHOULD); + } + yield new ConstantScoreQuery(builder.build()); + } + }; + } + } + private static class GeoShapeQueryList extends QueryList { private final BytesRef scratch = new BytesRef(); private final IntFunction blockValueReader; diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvAssert.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvAssert.java index 4e31916d5328e..e409c18c69169 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvAssert.java +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvAssert.java @@ -278,7 +278,7 @@ private static void dataFailure( fail(description + System.lineSeparator() + describeFailures(dataFailures) + actual + expected); } - private static final int MAX_ROWS = 25; + private static final int MAX_ROWS = 50; private static String pipeTable( String description, diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java index 8ded542ee30b9..eefcb3ab7581b 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java @@ -61,15 +61,14 @@ public class CsvTestsDataLoader { private static final TestDataset APPS = new TestDataset("apps"); private static final TestDataset APPS_SHORT = APPS.withIndex("apps_short").withTypeMapping(Map.of("id", "short")); private static final TestDataset LANGUAGES = new TestDataset("languages"); - private static final TestDataset LANGUAGES_LOOKUP = LANGUAGES.withIndex("languages_lookup") - .withSetting("languages_lookup-settings.json"); + private static final TestDataset LANGUAGES_LOOKUP = LANGUAGES.withIndex("languages_lookup").withSetting("lookup-settings.json"); private static final TestDataset LANGUAGES_LOOKUP_NON_UNIQUE_KEY = LANGUAGES_LOOKUP.withIndex("languages_lookup_non_unique_key") .withData("languages_non_unique_key.csv"); private static final TestDataset LANGUAGES_NESTED_FIELDS = new TestDataset( "languages_nested_fields", "mapping-languages_nested_fields.json", "languages_nested_fields.csv" - ).withSetting("languages_lookup-settings.json"); + ).withSetting("lookup-settings.json"); private static final TestDataset ALERTS = new TestDataset("alerts"); private static final TestDataset UL_LOGS = new TestDataset("ul_logs"); private static final TestDataset SAMPLE_DATA = new TestDataset("sample_data"); @@ -82,13 +81,14 @@ public class CsvTestsDataLoader { private static final TestDataset SAMPLE_DATA_TS_NANOS = SAMPLE_DATA.withIndex("sample_data_ts_nanos") .withData("sample_data_ts_nanos.csv") .withTypeMapping(Map.of("@timestamp", "date_nanos")); + private static final TestDataset LOOKUP_SAMPLE_DATA_TS_NANOS = SAMPLE_DATA_TS_NANOS.withIndex("lookup_sample_data_ts_nanos") + .withSetting("lookup-settings.json"); private static final TestDataset MISSING_IP_SAMPLE_DATA = new TestDataset("missing_ip_sample_data"); private static final TestDataset CLIENT_IPS = new TestDataset("clientips"); - private static final TestDataset CLIENT_IPS_LOOKUP = CLIENT_IPS.withIndex("clientips_lookup") - .withSetting("clientips_lookup-settings.json"); + private static final TestDataset CLIENT_IPS_LOOKUP = CLIENT_IPS.withIndex("clientips_lookup").withSetting("lookup-settings.json"); private static final TestDataset MESSAGE_TYPES = new TestDataset("message_types"); private static final TestDataset MESSAGE_TYPES_LOOKUP = MESSAGE_TYPES.withIndex("message_types_lookup") - .withSetting("message_types_lookup-settings.json"); + .withSetting("lookup-settings.json"); private static final TestDataset CLIENT_CIDR = new TestDataset("client_cidr"); private static final TestDataset AGES = new TestDataset("ages"); private static final TestDataset HEIGHTS = new TestDataset("heights"); @@ -133,6 +133,7 @@ public class CsvTestsDataLoader { Map.entry(SAMPLE_DATA_STR.indexName, SAMPLE_DATA_STR), Map.entry(SAMPLE_DATA_TS_LONG.indexName, SAMPLE_DATA_TS_LONG), Map.entry(SAMPLE_DATA_TS_NANOS.indexName, SAMPLE_DATA_TS_NANOS), + Map.entry(LOOKUP_SAMPLE_DATA_TS_NANOS.indexName, LOOKUP_SAMPLE_DATA_TS_NANOS), Map.entry(MISSING_IP_SAMPLE_DATA.indexName, MISSING_IP_SAMPLE_DATA), Map.entry(CLIENT_IPS.indexName, CLIENT_IPS), Map.entry(CLIENT_IPS_LOOKUP.indexName, CLIENT_IPS_LOOKUP), diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/languages_lookup-settings.json b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/languages_lookup-settings.json deleted file mode 100644 index b73d1f9accf92..0000000000000 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/languages_lookup-settings.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "index": { - "mode": "lookup" - } -} diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec index a3c97b524d10a..f415c0b415efc 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec @@ -1544,3 +1544,27 @@ French Spanish German ; + +############################################### +# LOOKUP JOIN on date_nanos field +############################################### + +joinDateNanos +required_capability: join_lookup_v12 +required_capability: date_nanos_lookup_join + +FROM sample_data_ts_nanos +| LOOKUP JOIN lookup_sample_data_ts_nanos ON @timestamp +| KEEP @timestamp, client_ip, event_duration, message +| SORT @timestamp DESC +; + +@timestamp:date_nanos | client_ip:ip | event_duration:long | message:keyword +2023-10-23T13:55:01.543123456Z | 172.21.3.15 | 1756467 | Connected to 10.1.0.1 +2023-10-23T13:53:55.832123456Z | 172.21.3.15 | 5033755 | Connection error +2023-10-23T13:52:55.015123456Z | 172.21.3.15 | 8268153 | Connection error +2023-10-23T13:51:54.732123456Z | 172.21.3.15 | 725448 | Connection error +2023-10-23T13:33:34.937123456Z | 172.21.0.5 | 1232382 | Disconnected +2023-10-23T12:27:28.948123456Z | 172.21.2.113 | 2764889 | Connected to 10.1.0.2 +2023-10-23T12:15:03.360123456Z | 172.21.2.162 | 3450233 | Connected to 10.1.0.3 +; diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/clientips_lookup-settings.json b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-settings.json similarity index 100% rename from x-pack/plugin/esql/qa/testFixtures/src/main/resources/clientips_lookup-settings.json rename to x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-settings.json diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/message_types_lookup-settings.json b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/message_types_lookup-settings.json deleted file mode 100644 index b73d1f9accf92..0000000000000 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/message_types_lookup-settings.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "index": { - "mode": "lookup" - } -} diff --git a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/LookupJoinTypesIT.java b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/LookupJoinTypesIT.java new file mode 100644 index 0000000000000..a8c90988bd461 --- /dev/null +++ b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/LookupJoinTypesIT.java @@ -0,0 +1,574 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.action; + +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.mapper.extras.MapperExtrasPlugin; +import org.elasticsearch.plugins.Plugin; +import org.elasticsearch.test.ESIntegTestCase; +import org.elasticsearch.test.ESIntegTestCase.ClusterScope; +import org.elasticsearch.xpack.core.esql.action.ColumnInfo; +import org.elasticsearch.xpack.esql.VerificationException; +import org.elasticsearch.xpack.esql.core.type.DataType; +import org.elasticsearch.xpack.esql.plan.logical.join.Join; +import org.elasticsearch.xpack.esql.plugin.EsqlPlugin; +import org.elasticsearch.xpack.spatial.SpatialPlugin; +import org.elasticsearch.xpack.unsignedlong.UnsignedLongMapperPlugin; +import org.elasticsearch.xpack.versionfield.VersionFieldPlugin; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.Set; +import java.util.function.Consumer; +import java.util.stream.Collectors; + +import static org.elasticsearch.test.ESIntegTestCase.Scope.SUITE; +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; +import static org.elasticsearch.xpack.esql.core.type.DataType.AGGREGATE_METRIC_DOUBLE; +import static org.elasticsearch.xpack.esql.core.type.DataType.BOOLEAN; +import static org.elasticsearch.xpack.esql.core.type.DataType.BYTE; +import static org.elasticsearch.xpack.esql.core.type.DataType.DATETIME; +import static org.elasticsearch.xpack.esql.core.type.DataType.DATE_NANOS; +import static org.elasticsearch.xpack.esql.core.type.DataType.DOC_DATA_TYPE; +import static org.elasticsearch.xpack.esql.core.type.DataType.DOUBLE; +import static org.elasticsearch.xpack.esql.core.type.DataType.FLOAT; +import static org.elasticsearch.xpack.esql.core.type.DataType.HALF_FLOAT; +import static org.elasticsearch.xpack.esql.core.type.DataType.INTEGER; +import static org.elasticsearch.xpack.esql.core.type.DataType.IP; +import static org.elasticsearch.xpack.esql.core.type.DataType.KEYWORD; +import static org.elasticsearch.xpack.esql.core.type.DataType.LONG; +import static org.elasticsearch.xpack.esql.core.type.DataType.NULL; +import static org.elasticsearch.xpack.esql.core.type.DataType.SCALED_FLOAT; +import static org.elasticsearch.xpack.esql.core.type.DataType.SEMANTIC_TEXT; +import static org.elasticsearch.xpack.esql.core.type.DataType.SHORT; +import static org.elasticsearch.xpack.esql.core.type.DataType.TEXT; +import static org.elasticsearch.xpack.esql.core.type.DataType.TSID_DATA_TYPE; +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.is; +import static org.hamcrest.Matchers.nullValue; + +/** + * This test suite tests the lookup join functionality in ESQL with various data types. + * For each pair of types being tested, it builds a main index called "index" containing a single document with as many fields as + * types being tested on the left of the pair, and then creates that many other lookup indexes, each with a single document containing + * exactly two fields: the field to join on, and a field to return. + * The assertion is that for valid combinations, the return result should exist, and for invalid combinations an exception should be thrown. + * If no exception is thrown, and no result is returned, our validation rules are not aligned with the internal behaviour (ie. a bug). + * Since the `LOOKUP JOIN` command requires the match field name to be the same between the main index and the lookup index, + * we will have field names that correctly represent the type of the field in the main index, but not the type of the field + * in the lookup index. This can be confusing, but it is important to remember that the field names are not the same as the types. + * For example, if we are testing the pairs (double, double), (double, float), (float, double) and (float, float), + * we will create the following indexes: + *
+ *
index_double_double
+ *
Index containing a single document with a field of type 'double' like:
+ *         {
+ *             "field_double": 1.0,  // this is mapped as type 'double'
+ *             "other": "value"
+ *         }
+ *     
+ *
index_double_float
+ *
Index containing a single document with a field of type 'float' like:
+ *         {
+ *             "field_double": 1.0,  // this is mapped as type 'float' (a float with the name of the main index field)
+ *             "other": "value"
+ *         }
+ *     
+ *
index_float_double
+ *
Index containing a single document with a field of type 'double' like:
+ *         {
+ *             "field_float": 1.0,  // this is mapped as type 'double' (a double with the name of the main index field)
+ *             "other": "value"
+ *         }
+ *     
+ *
index_float_float
+ *
Index containing single document with a field of type 'float' like:
+ *         {
+ *             "field_float": 1.0,  // this is mapped as type 'float'
+ *             "other": "value"
+ *         }
+ *     
+ *
index
+ *
Index containing document like:
+ *         {
+ *             "field_double": 1.0,  // this is mapped as type 'double'
+ *             "field_float": 1.0    // this is mapped as type 'float'
+ *         }
+ *     
+ *
+ * Note that the lookup indexes have fields with a name that matches the type in the main index, and not the type actually used in the + * lookup index. Instead, the mapped type should be the type of the right-hand side of the pair being tested. + * Then we can run queries like: + *
+ *     FROM index | LOOKUP JOIN index_double_float ON field_double | KEEP other
+ * 
+ * And assert that the result exists and is equal to "value". + */ +@ClusterScope(scope = SUITE, numClientNodes = 1, numDataNodes = 1) +public class LookupJoinTypesIT extends ESIntegTestCase { + protected Collection> nodePlugins() { + return List.of( + EsqlPlugin.class, + MapperExtrasPlugin.class, + VersionFieldPlugin.class, + UnsignedLongMapperPlugin.class, + SpatialPlugin.class + ); + } + + private static final Map testConfigurations = new HashMap<>(); + static { + // Initialize the test configurations for string tests + { + TestConfigs configs = testConfigurations.computeIfAbsent("strings", TestConfigs::new); + configs.addPasses(KEYWORD, KEYWORD); + configs.addPasses(TEXT, KEYWORD); + configs.addFailsUnsupported(KEYWORD, TEXT); + } + + // Test integer types + var integerTypes = List.of(BYTE, SHORT, INTEGER); + { + TestConfigs configs = testConfigurations.computeIfAbsent("integers", TestConfigs::new); + for (DataType mainType : integerTypes) { + for (DataType lookupType : integerTypes) { + configs.addPasses(mainType, lookupType); + } + // Long is currently treated differently in the validation, but we could consider changing that + configs.addFails(mainType, LONG); + configs.addFails(LONG, mainType); + } + } + + // Test float and double + var floatTypes = List.of(HALF_FLOAT, FLOAT, DOUBLE, SCALED_FLOAT); + { + TestConfigs configs = testConfigurations.computeIfAbsent("floats", TestConfigs::new); + for (DataType mainType : floatTypes) { + for (DataType lookupType : floatTypes) { + configs.addPasses(mainType, lookupType); + } + } + } + + // Tests for mixed-numerical types + { + TestConfigs configs = testConfigurations.computeIfAbsent("mixed-numerical", TestConfigs::new); + for (DataType mainType : integerTypes) { + for (DataType lookupType : floatTypes) { + // TODO: We should probably allow this, but we need to change the validation code in Join.java + configs.addFails(mainType, lookupType); + configs.addFails(lookupType, mainType); + } + } + } + + // Tests for mixed-date/time types + var dateTypes = List.of(DATETIME, DATE_NANOS); + { + TestConfigs configs = testConfigurations.computeIfAbsent("mixed-temporal", TestConfigs::new); + for (DataType mainType : dateTypes) { + for (DataType lookupType : dateTypes) { + if (mainType != lookupType) { + configs.addFails(mainType, lookupType); + } + } + } + } + + // Tests for all unsupported types + DataType[] unsupported = Join.UNSUPPORTED_TYPES; + { + Collection existing = testConfigurations.values(); + TestConfigs configs = testConfigurations.computeIfAbsent("unsupported", TestConfigs::new); + for (DataType type : unsupported) { + if (type == NULL + || type == DOC_DATA_TYPE + || type == TSID_DATA_TYPE + || type == AGGREGATE_METRIC_DOUBLE + || type.esType() == null + || type.isCounter() + || DataType.isRepresentable(type) == false) { + // Skip unmappable types, or types not supported in ES|QL in general + continue; + } + if (existingIndex(existing, type, type)) { + // Skip existing configurations + continue; + } + configs.addFailsUnsupported(type, type); + } + } + + // Tests for all types where left and right are the same type + DataType[] supported = { + BOOLEAN, + LONG, + INTEGER, + DOUBLE, + SHORT, + BYTE, + FLOAT, + HALF_FLOAT, + DATETIME, + DATE_NANOS, + IP, + KEYWORD, + SCALED_FLOAT }; + { + Collection existing = testConfigurations.values(); + TestConfigs configs = testConfigurations.computeIfAbsent("same", TestConfigs::new); + for (DataType type : supported) { + assertThat("Claiming supported for unsupported type: " + type, List.of(unsupported).contains(type), is(false)); + if (existingIndex(existing, type, type) == false) { + // Only add the configuration if it doesn't already exist + configs.addPasses(type, type); + } + } + } + + // Assert that unsupported types are not in the supported list + for (DataType type : unsupported) { + assertThat("Claiming supported for unsupported type: " + type, List.of(supported).contains(type), is(false)); + } + + // Assert that unsupported+supported covers all types: + List missing = new ArrayList<>(); + for (DataType type : DataType.values()) { + boolean isUnsupported = List.of(unsupported).contains(type); + boolean isSupported = List.of(supported).contains(type); + if (isUnsupported == false && isSupported == false && type != SEMANTIC_TEXT) { + missing.add(type); + } + } + assertThat(missing + " are not in the supported or unsupported list", missing.size(), is(0)); + + // Tests for all other type combinations + { + Collection existing = testConfigurations.values(); + TestConfigs configs = testConfigurations.computeIfAbsent("others", TestConfigs::new); + for (DataType mainType : supported) { + for (DataType lookupType : supported) { + if (existingIndex(existing, mainType, lookupType) == false) { + // Only add the configuration if it doesn't already exist + configs.addFails(mainType, lookupType); + } + } + } + } + + // Make sure we have never added two configurations with the same index name + Set knownTypes = new HashSet<>(); + for (TestConfigs configs : testConfigurations.values()) { + for (TestConfig config : configs.configs.values()) { + if (knownTypes.contains(config.indexName())) { + throw new IllegalArgumentException("Duplicate index name: " + config.indexName()); + } + knownTypes.add(config.indexName()); + } + } + } + + private static boolean existingIndex(Collection existing, DataType mainType, DataType lookupType) { + String indexName = "index_" + mainType.esType() + "_" + lookupType.esType(); + return existing.stream().anyMatch(c -> c.exists(indexName)); + } + + public void testLookupJoinStrings() { + testLookupJoinTypes("strings"); + } + + public void testLookupJoinIntegers() { + testLookupJoinTypes("integers"); + } + + public void testLookupJoinFloats() { + testLookupJoinTypes("floats"); + } + + public void testLookupJoinMixedNumerical() { + testLookupJoinTypes("mixed-numerical"); + } + + public void testLookupJoinMixedTemporal() { + testLookupJoinTypes("mixed-temporal"); + } + + public void testLookupJoinSame() { + testLookupJoinTypes("same"); + } + + public void testLookupJoinUnsupported() { + testLookupJoinTypes("unsupported"); + } + + public void testLookupJoinOthers() { + testLookupJoinTypes("others"); + } + + private void testLookupJoinTypes(String group) { + initIndexes(group); + initData(group); + for (TestConfig config : testConfigurations.get(group).configs.values()) { + String query = String.format( + Locale.ROOT, + "FROM index | LOOKUP JOIN %s ON %s | KEEP other", + config.indexName(), + config.fieldName() + ); + config.validateMainIndex(); + config.validateLookupIndex(); + config.testQuery(query); + } + } + + private void initIndexes(String group) { + Collection configs = testConfigurations.get(group).configs.values(); + String propertyPrefix = "{\n \"properties\" : {\n"; + String propertySuffix = " }\n}\n"; + // The main index will have many fields, one of each type to use in later type specific joins + String mainFields = propertyPrefix + configs.stream() + .map(TestConfig::mainPropertySpec) + .distinct() + .collect(Collectors.joining(",\n ")) + propertySuffix; + assertAcked(prepareCreate("index").setMapping(mainFields)); + + Settings.Builder settings = Settings.builder() + .put("index.number_of_shards", 1) + .put("index.number_of_replicas", 0) + .put("index.mode", "lookup"); + configs.forEach( + // Each lookup index will get a document with a field to join on, and a results field to get back + (c) -> assertAcked( + prepareCreate(c.indexName()).setSettings(settings.build()) + .setMapping(propertyPrefix + c.lookupPropertySpec() + propertySuffix) + ) + ); + } + + private void initData(String group) { + Collection configs = testConfigurations.get(group).configs.values(); + int docId = 0; + for (TestConfig config : configs) { + String doc = String.format(Locale.ROOT, """ + { + %s, + "other": "value" + } + """, lookupPropertyFor(config)); + index(config.indexName(), "" + (++docId), doc); + refresh(config.indexName()); + } + List mainProperties = configs.stream().map(this::mainPropertyFor).distinct().collect(Collectors.toList()); + index("index", "1", String.format(Locale.ROOT, """ + { + %s + } + """, String.join(",\n ", mainProperties))); + refresh("index"); + } + + private String lookupPropertyFor(TestConfig config) { + return String.format(Locale.ROOT, "\"%s\": %s", config.fieldName(), sampleDataTextFor(config.lookupType())); + } + + private String mainPropertyFor(TestConfig config) { + return String.format(Locale.ROOT, "\"%s\": %s", config.fieldName(), sampleDataTextFor(config.mainType())); + } + + private static String sampleDataTextFor(DataType type) { + var value = sampleDataFor(type); + if (value instanceof String) { + return "\"" + value + "\""; + } + return String.valueOf(value); + } + + private static final double SCALING_FACTOR = 10.0; + + private static Object sampleDataFor(DataType type) { + return switch (type) { + case BOOLEAN -> true; + case DATETIME, DATE_NANOS -> "2025-04-02T12:00:00.000Z"; + case IP -> "127.0.0.1"; + case KEYWORD, TEXT -> "key"; + case BYTE, SHORT, INTEGER -> 1; + case LONG, UNSIGNED_LONG -> 1L; + case HALF_FLOAT, FLOAT, DOUBLE, SCALED_FLOAT -> 1.0; + case VERSION -> "1.2.19"; + case GEO_POINT, CARTESIAN_POINT -> "POINT (1.0 2.0)"; + case GEO_SHAPE, CARTESIAN_SHAPE -> "POLYGON ((0.0 0.0, 1.0 0.0, 1.0 1.0, 0.0 1.0, 0.0 0.0))"; + default -> throw new IllegalArgumentException("Unsupported type: " + type); + }; + } + + private static class TestConfigs { + final String group; + final Map configs; + + TestConfigs(String group) { + this.group = group; + this.configs = new LinkedHashMap<>(); + } + + private boolean exists(String indexName) { + return configs.containsKey(indexName); + } + + private void add(TestConfig config) { + if (configs.containsKey(config.indexName())) { + throw new IllegalArgumentException("Duplicate index name: " + config.indexName()); + } + configs.put(config.indexName(), config); + } + + private void addPasses(DataType mainType, DataType lookupType) { + add(new TestConfigPasses(mainType, lookupType, true)); + } + + private void addFails(DataType mainType, DataType lookupType) { + String fieldName = "field_" + mainType.esType(); + String errorMessage = String.format( + Locale.ROOT, + "JOIN left field [%s] of type [%s] is incompatible with right field [%s] of type [%s]", + fieldName, + mainType.widenSmallNumeric(), + fieldName, + lookupType.widenSmallNumeric() + ); + add( + new TestConfigFails<>( + mainType, + lookupType, + VerificationException.class, + e -> assertThat(e.getMessage(), containsString(errorMessage)) + ) + ); + } + + private void addFailsUnsupported(DataType mainType, DataType lookupType) { + String fieldName = "field_" + mainType.esType(); + String errorMessage = String.format( + Locale.ROOT, + "JOIN with right field [%s] of type [%s] is not supported", + fieldName, + lookupType + ); + add( + new TestConfigFails<>( + mainType, + lookupType, + VerificationException.class, + e -> assertThat(e.getMessage(), containsString(errorMessage)) + ) + ); + } + } + + interface TestConfig { + DataType mainType(); + + DataType lookupType(); + + default String indexName() { + return "index_" + mainType().esType() + "_" + lookupType().esType(); + } + + default String fieldName() { + return "field_" + mainType().esType(); + } + + default String mainPropertySpec() { + return propertySpecFor(fieldName(), mainType(), ""); + } + + default String lookupPropertySpec() { + return propertySpecFor(fieldName(), lookupType(), ", \"other\": { \"type\" : \"keyword\" }"); + } + + /** Make sure the left index has the expected fields and types */ + default void validateMainIndex() { + validateIndex("index", fieldName(), sampleDataFor(mainType())); + } + + /** Make sure the lookup index has the expected fields and types */ + default void validateLookupIndex() { + validateIndex(indexName(), fieldName(), sampleDataFor(lookupType())); + } + + void testQuery(String query); + } + + private static String propertySpecFor(String fieldName, DataType type, String extra) { + if (type == SCALED_FLOAT) { + return String.format( + Locale.ROOT, + "\"%s\": { \"type\" : \"%s\", \"scaling_factor\": %f }", + fieldName, + type.esType(), + SCALING_FACTOR + ) + extra; + } + return String.format(Locale.ROOT, "\"%s\": { \"type\" : \"%s\" }", fieldName, type.esType().replaceAll("cartesian_", "")) + extra; + } + + private static void validateIndex(String indexName, String fieldName, Object expectedValue) { + String query = String.format(Locale.ROOT, "FROM %s | KEEP %s", indexName, fieldName); + try (var response = EsqlQueryRequestBuilder.newRequestBuilder(client()).query(query).get()) { + ColumnInfo info = response.response().columns().get(0); + assertThat("Expected index '" + indexName + "' to have column '" + fieldName + ": " + query, info.name(), is(fieldName)); + Iterator results = response.response().column(0).iterator(); + assertTrue("Expected at least one result for query: " + query, results.hasNext()); + Object indexedResult = response.response().column(0).iterator().next(); + assertThat("Expected valid result: " + query, indexedResult, is(expectedValue)); + } + } + + private record TestConfigPasses(DataType mainType, DataType lookupType, boolean hasResults) implements TestConfig { + @Override + public void testQuery(String query) { + try (var response = EsqlQueryRequestBuilder.newRequestBuilder(client()).query(query).get()) { + Iterator results = response.response().column(0).iterator(); + assertTrue("Expected at least one result for query: " + query, results.hasNext()); + Object indexedResult = response.response().column(0).iterator().next(); + if (hasResults) { + assertThat("Expected valid result: " + query, indexedResult, equalTo("value")); + } else { + assertThat("Expected empty results for query: " + query, indexedResult, is(nullValue())); + } + } + } + } + + private record TestConfigFails(DataType mainType, DataType lookupType, Class exception, Consumer assertion) + implements + TestConfig { + @Override + public void testQuery(String query) { + E e = expectThrows( + exception(), + "Expected exception " + exception().getSimpleName() + " but no exception was thrown: " + query, + () -> { + // noinspection EmptyTryBlock + try (var ignored = EsqlQueryRequestBuilder.newRequestBuilder(client()).query(query).get()) { + // We use try-with-resources to ensure the request is closed if the exception is not thrown (less cluttered errors) + } + } + ); + assertion().accept(e); + } + } +} diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index 80509b7519bff..abe66e7308225 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -418,6 +418,12 @@ public enum Cap { * e.g. {@code WHERE millis > to_datenanos("2023-10-23T12:15:03.360103847") AND millis < to_datetime("2023-10-23T13:53:55.832")} */ FIX_DATE_NANOS_MIXED_RANGE_PUSHDOWN_BUG(), + + /** + * Support for date nanos in lookup join. Done in #127962 + */ + DATE_NANOS_LOOKUP_JOIN, + /** * DATE_PARSE supports reading timezones */ diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/enrich/AbstractLookupService.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/enrich/AbstractLookupService.java index b5caa42577311..fa6ab872e2d90 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/enrich/AbstractLookupService.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/enrich/AbstractLookupService.java @@ -201,6 +201,7 @@ protected static QueryList termQueryList( return switch (inputDataType) { case IP -> QueryList.ipTermQueryList(field, searchExecutionContext, (BytesRefBlock) block); case DATETIME -> QueryList.dateTermQueryList(field, searchExecutionContext, (LongBlock) block); + case DATE_NANOS -> QueryList.dateNanosTermQueryList(field, searchExecutionContext, (LongBlock) block); default -> QueryList.rawTermQueryList(field, searchExecutionContext, block); }; } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/join/Join.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/join/Join.java index 14877abb62272..e9fb082ec0b1c 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/join/Join.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/join/Join.java @@ -18,6 +18,7 @@ import org.elasticsearch.xpack.esql.core.expression.ReferenceAttribute; import org.elasticsearch.xpack.esql.core.tree.NodeInfo; import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.core.type.DataType; import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput; import org.elasticsearch.xpack.esql.plan.logical.BinaryPlan; import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan; @@ -25,16 +26,57 @@ import java.io.IOException; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import java.util.Objects; import static org.elasticsearch.xpack.esql.common.Failure.fail; +import static org.elasticsearch.xpack.esql.core.type.DataType.AGGREGATE_METRIC_DOUBLE; +import static org.elasticsearch.xpack.esql.core.type.DataType.CARTESIAN_POINT; +import static org.elasticsearch.xpack.esql.core.type.DataType.CARTESIAN_SHAPE; +import static org.elasticsearch.xpack.esql.core.type.DataType.COUNTER_DOUBLE; +import static org.elasticsearch.xpack.esql.core.type.DataType.COUNTER_INTEGER; +import static org.elasticsearch.xpack.esql.core.type.DataType.COUNTER_LONG; +import static org.elasticsearch.xpack.esql.core.type.DataType.DATE_PERIOD; +import static org.elasticsearch.xpack.esql.core.type.DataType.DOC_DATA_TYPE; +import static org.elasticsearch.xpack.esql.core.type.DataType.GEO_POINT; +import static org.elasticsearch.xpack.esql.core.type.DataType.GEO_SHAPE; +import static org.elasticsearch.xpack.esql.core.type.DataType.NULL; +import static org.elasticsearch.xpack.esql.core.type.DataType.OBJECT; +import static org.elasticsearch.xpack.esql.core.type.DataType.PARTIAL_AGG; +import static org.elasticsearch.xpack.esql.core.type.DataType.SOURCE; import static org.elasticsearch.xpack.esql.core.type.DataType.TEXT; +import static org.elasticsearch.xpack.esql.core.type.DataType.TIME_DURATION; +import static org.elasticsearch.xpack.esql.core.type.DataType.TSID_DATA_TYPE; +import static org.elasticsearch.xpack.esql.core.type.DataType.UNSIGNED_LONG; +import static org.elasticsearch.xpack.esql.core.type.DataType.UNSUPPORTED; +import static org.elasticsearch.xpack.esql.core.type.DataType.VERSION; import static org.elasticsearch.xpack.esql.expression.NamedExpressions.mergeOutputAttributes; import static org.elasticsearch.xpack.esql.plan.logical.join.JoinTypes.LEFT; public class Join extends BinaryPlan implements PostAnalysisVerificationAware, SortAgnostic { public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(LogicalPlan.class, "Join", Join::new); + public static final DataType[] UNSUPPORTED_TYPES = { + TEXT, + VERSION, + UNSIGNED_LONG, + GEO_POINT, + GEO_SHAPE, + CARTESIAN_POINT, + CARTESIAN_SHAPE, + UNSUPPORTED, + NULL, + COUNTER_LONG, + COUNTER_INTEGER, + COUNTER_DOUBLE, + OBJECT, + SOURCE, + DATE_PERIOD, + TIME_DURATION, + DOC_DATA_TYPE, + TSID_DATA_TYPE, + PARTIAL_AGG, + AGGREGATE_METRIC_DOUBLE }; private final JoinConfig config; private List lazyOutput; @@ -213,7 +255,7 @@ public void postAnalysisVerification(Failures failures) { for (int i = 0; i < config.leftFields().size(); i++) { Attribute leftField = config.leftFields().get(i); Attribute rightField = config.rightFields().get(i); - if (leftField.dataType().noText() != rightField.dataType().noText()) { + if (comparableTypes(leftField, rightField) == false) { failures.add( fail( leftField, @@ -225,11 +267,18 @@ public void postAnalysisVerification(Failures failures) { ) ); } - if (rightField.dataType().equals(TEXT)) { + // TODO: Add support for VERSION by implementing QueryList.versionTermQueryList similar to ipTermQueryList + if (Arrays.stream(UNSUPPORTED_TYPES).anyMatch(t -> rightField.dataType().equals(t))) { failures.add( fail(leftField, "JOIN with right field [{}] of type [{}] is not supported", rightField.name(), rightField.dataType()) ); } } } + + private static boolean comparableTypes(Attribute left, Attribute right) { + // TODO: Consider allowing more valid types + // return left.dataType().noText() == right.dataType().noText() || left.dataType().isNumeric() == right.dataType().isNumeric(); + return left.dataType().noText() == right.dataType().noText(); + } }