Skip to content

Commit 9452a85

Browse files
authored
ESQL: Support union types/multi-index CSV tests (#119273)
ESQL: adds support for multi-index/union types in CSV tests. Previously, these were only testable using integration tests, which are slower and more annoying to debug.
1 parent e87020e commit 9452a85

File tree

7 files changed

+339
-202
lines changed

7 files changed

+339
-202
lines changed

x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestUtils.java

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -551,11 +551,11 @@ public static Type asType(ElementType elementType, Type actualType) {
551551
}
552552

553553
private static Type bytesRefBlockType(Type actualType) {
554-
if (actualType == GEO_POINT || actualType == CARTESIAN_POINT || actualType == GEO_SHAPE || actualType == CARTESIAN_SHAPE) {
555-
return actualType;
556-
} else {
557-
return KEYWORD;
558-
}
554+
return switch (actualType) {
555+
case NULL -> NULL;
556+
case GEO_POINT, CARTESIAN_POINT, GEO_SHAPE, CARTESIAN_SHAPE -> actualType;
557+
default -> KEYWORD;
558+
};
559559
}
560560

561561
Object convert(String value) {

x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
import org.elasticsearch.common.logging.LogConfigurator;
2727
import org.elasticsearch.common.settings.Settings;
2828
import org.elasticsearch.common.xcontent.XContentHelper;
29+
import org.elasticsearch.core.Nullable;
2930
import org.elasticsearch.logging.LogManager;
3031
import org.elasticsearch.logging.Logger;
3132
import org.elasticsearch.test.rest.ESRestTestCase;
@@ -603,13 +604,20 @@ private static void forceMerge(RestClient client, Set<String> indices, Logger lo
603604
}
604605
}
605606

607+
public record MultiIndexTestDataset(String indexPattern, List<TestsDataset> datasets) {
608+
public static MultiIndexTestDataset of(TestsDataset testsDataset) {
609+
return new MultiIndexTestDataset(testsDataset.indexName, List.of(testsDataset));
610+
}
611+
612+
}
613+
606614
public record TestsDataset(
607615
String indexName,
608616
String mappingFileName,
609617
String dataFileName,
610618
String settingFileName,
611619
boolean allowSubFields,
612-
Map<String, String> typeMapping,
620+
@Nullable Map<String, String> typeMapping,
613621
boolean requiresInferenceEndpoint
614622
) {
615623
public TestsDataset(String indexName, String mappingFileName, String dataFileName) {

x-pack/plugin/esql/qa/testFixtures/src/main/resources/union_types.csv-spec

Lines changed: 2 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -445,6 +445,7 @@ count:long | message:keyword
445445

446446
multiIndexMissingIpToString
447447
required_capability: union_types
448+
required_capability: metadata_fields
448449
required_capability: union_types_missing_field
449450

450451
FROM sample_data, sample_data_str, missing_ip_sample_data METADATA _index
@@ -479,6 +480,7 @@ sample_data_str | 2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450
479480

480481
multiIndexMissingIpToIp
481482
required_capability: union_types
483+
required_capability: metadata_fields
482484
required_capability: union_types_missing_field
483485

484486
FROM sample_data, sample_data_str, missing_ip_sample_data METADATA _index
@@ -1373,9 +1375,6 @@ client_ip:ip | event_duration:long | message:keyword | @timestamp:keywo
13731375
# Once INLINESTATS supports expressions in agg functions and groups, convert the group in the inlinestats
13741376

13751377
multiIndexIndirectUseOfUnionTypesInSort
1376-
// TODO: `union_types` is required only because this makes the test skip in the csv tests; better solution:
1377-
// make the csv tests work with multiple indices.
1378-
required_capability: union_types
13791378
FROM sample_data, sample_data_ts_long
13801379
| SORT client_ip ASC
13811380
| LIMIT 1
@@ -1386,8 +1385,6 @@ FROM sample_data, sample_data_ts_long
13861385
;
13871386

13881387
multiIndexIndirectUseOfUnionTypesInEval
1389-
// TODO: `union_types` is required only because this makes the test skip in the csv tests; better solution:
1390-
// make the csv tests work with multiple indices.
13911388
required_capability: union_types
13921389
FROM sample_data, sample_data_ts_long
13931390
| EVAL foo = event_duration > 1232381
@@ -1400,9 +1397,6 @@ FROM sample_data, sample_data_ts_long
14001397
;
14011398

14021399
multiIndexIndirectUseOfUnionTypesInRename
1403-
// TODO: `union_types` is required only because this makes the test skip in the csv tests; better solution:
1404-
// make the csv tests work with multiple indices.
1405-
required_capability: union_types
14061400
required_capability: union_types_fix_rename_resolution
14071401
FROM sample_data, sample_data_ts_long
14081402
| RENAME message AS event_message
@@ -1415,9 +1409,6 @@ FROM sample_data, sample_data_ts_long
14151409
;
14161410

14171411
multiIndexIndirectUseOfUnionTypesInKeep
1418-
// TODO: `union_types` is required only because this makes the test skip in the csv tests; better solution:
1419-
// make the csv tests work with multiple indices.
1420-
required_capability: union_types
14211412
FROM sample_data, sample_data_ts_long
14221413
| KEEP client_ip, event_duration, message
14231414
| SORT client_ip ASC
@@ -1429,9 +1420,6 @@ client_ip:ip | event_duration:long | message:keyword
14291420
;
14301421

14311422
multiIndexIndirectUseOfUnionTypesInWildcardKeep
1432-
// TODO: `union_types` is required only because this makes the test skip in the csv tests; better solution:
1433-
// make the csv tests work with multiple indices.
1434-
required_capability: union_types
14351423
required_capability: union_types_fix_rename_resolution
14361424
FROM sample_data, sample_data_ts_long
14371425
| KEEP *
@@ -1444,9 +1432,6 @@ FROM sample_data, sample_data_ts_long
14441432
;
14451433

14461434
multiIndexIndirectUseOfUnionTypesInWildcardKeep2
1447-
// TODO: `union_types` is required only because this makes the test skip in the csv tests; better solution:
1448-
// make the csv tests work with multiple indices.
1449-
required_capability: union_types
14501435
required_capability: union_types_fix_rename_resolution
14511436
FROM sample_data, sample_data_ts_long
14521437
| KEEP *e*
@@ -1460,9 +1445,6 @@ FROM sample_data, sample_data_ts_long
14601445

14611446

14621447
multiIndexUseOfUnionTypesInKeep
1463-
// TODO: `union_types` is required only because this makes the test skip in the csv tests; better solution:
1464-
// make the csv tests work with multiple indices.
1465-
required_capability: union_types
14661448
required_capability: union_types_fix_rename_resolution
14671449
FROM sample_data, sample_data_ts_long
14681450
| KEEP @timestamp
@@ -1474,9 +1456,6 @@ null
14741456
;
14751457

14761458
multiIndexUseOfUnionTypesInDrop
1477-
// TODO: `union_types` is required only because this makes the test skip in the csv tests; better solution:
1478-
// make the csv tests work with multiple indices.
1479-
required_capability: union_types
14801459
required_capability: union_types_fix_rename_resolution
14811460
FROM sample_data, sample_data_ts_long
14821461
| DROP @timestamp
@@ -1489,9 +1468,6 @@ client_ip:ip | event_duration:long | message:keyword
14891468
;
14901469

14911470
multiIndexIndirectUseOfUnionTypesInWildcardDrop
1492-
// TODO: `union_types` is required only because this makes the test skip in the csv tests; better solution:
1493-
// make the csv tests work with multiple indices.
1494-
required_capability: union_types
14951471
required_capability: union_types_fix_rename_resolution
14961472
FROM sample_data, sample_data_ts_long
14971473
| DROP *time*
@@ -1504,9 +1480,6 @@ client_ip:ip | event_duration:long | message:keyword
15041480
;
15051481

15061482
multiIndexIndirectUseOfUnionTypesInWhere
1507-
// TODO: `union_types` is required only because this makes the test skip in the csv tests; better solution:
1508-
// make the csv tests work with multiple indices.
1509-
required_capability: union_types
15101483
FROM sample_data, sample_data_ts_long
15111484
| WHERE message == "Disconnected"
15121485
;
@@ -1517,9 +1490,6 @@ FROM sample_data, sample_data_ts_long
15171490
;
15181491

15191492
multiIndexIndirectUseOfUnionTypesInDissect
1520-
// TODO: `union_types` is required only because this makes the test skip in the csv tests; better solution:
1521-
// make the csv tests work with multiple indices.
1522-
required_capability: union_types
15231493
FROM sample_data, sample_data_ts_long
15241494
| DISSECT message "%{foo}"
15251495
| SORT client_ip ASC
@@ -1531,9 +1501,6 @@ FROM sample_data, sample_data_ts_long
15311501
;
15321502

15331503
multiIndexIndirectUseOfUnionTypesInGrok
1534-
// TODO: `union_types` is required only because this makes the test skip in the csv tests; better solution:
1535-
// make the csv tests work with multiple indices.
1536-
required_capability: union_types
15371504
FROM sample_data, sample_data_ts_long
15381505
| GROK message "%{WORD:foo}"
15391506
| SORT client_ip ASC
@@ -1545,9 +1512,6 @@ FROM sample_data, sample_data_ts_long
15451512
;
15461513

15471514
multiIndexIndirectUseOfUnionTypesInEnrich
1548-
// TODO: `union_types` is required only because this makes the test skip in the csv tests; better solution:
1549-
// make the csv tests work with multiple indices.
1550-
required_capability: union_types
15511515
required_capability: enrich_load
15521516
FROM sample_data, sample_data_ts_long
15531517
| EVAL client_ip = client_ip::keyword
@@ -1561,9 +1525,6 @@ FROM sample_data, sample_data_ts_long
15611525
;
15621526

15631527
multiIndexIndirectUseOfUnionTypesInStats
1564-
// TODO: `union_types` is required only because this makes the test skip in the csv tests; better solution:
1565-
// make the csv tests work with multiple indices.
1566-
required_capability: union_types
15671528
FROM sample_data, sample_data_ts_long
15681529
| STATS foo = max(event_duration) BY client_ip
15691530
| SORT client_ip ASC
@@ -1577,9 +1538,6 @@ foo:long | client_ip:ip
15771538
;
15781539

15791540
multiIndexIndirectUseOfUnionTypesInInlineStats-Ignore
1580-
// TODO: `union_types` is required only because this makes the test skip in the csv tests; better solution:
1581-
// make the csv tests work with multiple indices.
1582-
required_capability: union_types
15831541
required_capability: inlinestats
15841542
FROM sample_data, sample_data_ts_long
15851543
| INLINESTATS foo = max(event_duration)
@@ -1592,9 +1550,6 @@ FROM sample_data, sample_data_ts_long
15921550
;
15931551

15941552
multiIndexIndirectUseOfUnionTypesInLookup-Ignore
1595-
// TODO: `union_types` is required only because this makes the test skip in the csv tests; better solution:
1596-
// make the csv tests work with multiple indices.
1597-
required_capability: union_types
15981553
required_capability: lookup_v4
15991554
FROM sample_data, sample_data_ts_long
16001555
| SORT client_ip ASC
@@ -1608,9 +1563,6 @@ FROM sample_data, sample_data_ts_long
16081563
;
16091564

16101565
multiIndexIndirectUseOfUnionTypesInMvExpand
1611-
// TODO: `union_types` is required only because this makes the test skip in the csv tests; better solution:
1612-
// make the csv tests work with multiple indices.
1613-
required_capability: union_types
16141566
FROM sample_data, sample_data_ts_long
16151567
| EVAL foo = MV_APPEND(message, message)
16161568
| SORT client_ip ASC

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java

Lines changed: 7 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -13,22 +13,16 @@
1313
import org.apache.lucene.search.BooleanQuery;
1414
import org.apache.lucene.search.IndexSearcher;
1515
import org.apache.lucene.search.Query;
16-
import org.elasticsearch.common.breaker.CircuitBreaker;
17-
import org.elasticsearch.common.breaker.NoopCircuitBreaker;
1816
import org.elasticsearch.common.logging.HeaderWarning;
19-
import org.elasticsearch.common.util.BigArrays;
2017
import org.elasticsearch.compute.aggregation.GroupingAggregator;
2118
import org.elasticsearch.compute.data.Block;
2219
import org.elasticsearch.compute.data.ElementType;
23-
import org.elasticsearch.compute.data.Page;
2420
import org.elasticsearch.compute.lucene.LuceneCountOperator;
2521
import org.elasticsearch.compute.lucene.LuceneOperator;
2622
import org.elasticsearch.compute.lucene.LuceneSourceOperator;
2723
import org.elasticsearch.compute.lucene.LuceneTopNSourceOperator;
2824
import org.elasticsearch.compute.lucene.TimeSeriesSortedSourceOperatorFactory;
2925
import org.elasticsearch.compute.lucene.ValuesSourceReaderOperator;
30-
import org.elasticsearch.compute.operator.DriverContext;
31-
import org.elasticsearch.compute.operator.EvalOperator;
3226
import org.elasticsearch.compute.operator.Operator;
3327
import org.elasticsearch.compute.operator.OrdinalsGroupingOperator;
3428
import org.elasticsearch.compute.operator.SourceOperator;
@@ -380,29 +374,13 @@ public FieldNamesFieldMapper.FieldNamesFieldType fieldNames() {
380374
}
381375
}
382376

383-
static class TypeConvertingBlockLoader implements BlockLoader {
384-
protected final BlockLoader delegate;
385-
private final EvalOperator.ExpressionEvaluator convertEvaluator;
377+
private static class TypeConvertingBlockLoader implements BlockLoader {
378+
private final BlockLoader delegate;
379+
private final TypeConverter typeConverter;
386380

387381
protected TypeConvertingBlockLoader(BlockLoader delegate, AbstractConvertFunction convertFunction) {
388382
this.delegate = delegate;
389-
DriverContext driverContext1 = new DriverContext(
390-
BigArrays.NON_RECYCLING_INSTANCE,
391-
new org.elasticsearch.compute.data.BlockFactory(
392-
new NoopCircuitBreaker(CircuitBreaker.REQUEST),
393-
BigArrays.NON_RECYCLING_INSTANCE
394-
)
395-
);
396-
this.convertEvaluator = convertFunction.toEvaluator(e -> driverContext -> new EvalOperator.ExpressionEvaluator() {
397-
@Override
398-
public org.elasticsearch.compute.data.Block eval(Page page) {
399-
// This is a pass-through evaluator, since it sits directly on the source loading (no prior expressions)
400-
return page.getBlock(0);
401-
}
402-
403-
@Override
404-
public void close() {}
405-
}).get(driverContext1);
383+
this.typeConverter = TypeConverter.fromConvertFunction(convertFunction);
406384
}
407385

408386
@Override
@@ -413,8 +391,7 @@ public Builder builder(BlockFactory factory, int expectedCount) {
413391

414392
@Override
415393
public Block convert(Block block) {
416-
Page page = new Page((org.elasticsearch.compute.data.Block) block);
417-
return convertEvaluator.eval(page);
394+
return typeConverter.convert((org.elasticsearch.compute.data.Block) block);
418395
}
419396

420397
@Override
@@ -427,9 +404,7 @@ public ColumnAtATimeReader columnAtATimeReader(LeafReaderContext context) throws
427404
@Override
428405
public Block read(BlockFactory factory, Docs docs) throws IOException {
429406
Block block = reader.read(factory, docs);
430-
Page page = new Page((org.elasticsearch.compute.data.Block) block);
431-
org.elasticsearch.compute.data.Block converted = convertEvaluator.eval(page);
432-
return converted;
407+
return typeConverter.convert((org.elasticsearch.compute.data.Block) block);
433408
}
434409

435410
@Override
@@ -469,7 +444,7 @@ public SortedSetDocValues ordinals(LeafReaderContext context) {
469444

470445
@Override
471446
public final String toString() {
472-
return "TypeConvertingBlockLoader[delegate=" + delegate + ", convertEvaluator=" + convertEvaluator + "]";
447+
return "TypeConvertingBlockLoader[delegate=" + delegate + ", typeConverter=" + typeConverter + "]";
473448
}
474449
}
475450
}
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License
4+
* 2.0; you may not use this file except in compliance with the Elastic License
5+
* 2.0.
6+
*/
7+
8+
package org.elasticsearch.xpack.esql.planner;
9+
10+
import org.elasticsearch.common.breaker.CircuitBreaker;
11+
import org.elasticsearch.common.breaker.NoopCircuitBreaker;
12+
import org.elasticsearch.common.util.BigArrays;
13+
import org.elasticsearch.compute.data.Block;
14+
import org.elasticsearch.compute.data.Page;
15+
import org.elasticsearch.compute.operator.DriverContext;
16+
import org.elasticsearch.compute.operator.EvalOperator.ExpressionEvaluator;
17+
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.AbstractConvertFunction;
18+
19+
class TypeConverter {
20+
private final String evaluatorName;
21+
private final ExpressionEvaluator convertEvaluator;
22+
23+
private TypeConverter(String evaluatorName, ExpressionEvaluator convertEvaluator) {
24+
this.evaluatorName = evaluatorName;
25+
this.convertEvaluator = convertEvaluator;
26+
}
27+
28+
public static TypeConverter fromConvertFunction(AbstractConvertFunction convertFunction) {
29+
DriverContext driverContext1 = new DriverContext(
30+
BigArrays.NON_RECYCLING_INSTANCE,
31+
new org.elasticsearch.compute.data.BlockFactory(
32+
new NoopCircuitBreaker(CircuitBreaker.REQUEST),
33+
BigArrays.NON_RECYCLING_INSTANCE
34+
)
35+
);
36+
return new TypeConverter(
37+
convertFunction.functionName(),
38+
convertFunction.toEvaluator(e -> driverContext -> new ExpressionEvaluator() {
39+
@Override
40+
public org.elasticsearch.compute.data.Block eval(Page page) {
41+
// This is a pass-through evaluator, since it sits directly on the source loading (no prior expressions)
42+
return page.getBlock(0);
43+
}
44+
45+
@Override
46+
public void close() {}
47+
}).get(driverContext1)
48+
);
49+
}
50+
51+
public Block convert(Block block) {
52+
return convertEvaluator.eval(new Page(block));
53+
}
54+
55+
@Override
56+
public String toString() {
57+
return evaluatorName;
58+
}
59+
}

0 commit comments

Comments
 (0)