Skip to content

Commit 561e50c

Browse files
authored
[8.x] ESQL: Speed type error testing (#119678) (#119762)
* ESQL: Speed type error testing (#119678) This shaves a few minutes off of the ESQL build: ``` 14m 50s -> 12m 38s ``` It does so by moving the type error testing from parameterized tests to a single, stand alone test per scalar that checks the errors for all unsupported types. It gets the list from the parameterized tests the same way as we were doing. But it's *fast*. AND, this will let us test a huge number of combinations without nearly as much overhead as we had before. In the worse case, unary math functions, this doesn't save any time. Maybe .1 second per function. For binary math functions it saves a *little* time. About a second per function. But for non-math, multivalued functions: wow. IpPrefix is ternary and it's test goes from 56.8 seconds to 2.6 seconds! Here are a few examples. | name | before | after | before| after | | -----------------: | -----: | -----------: | ----: | ----: | | Sin | 2.6s | 2.5s | 400 | 291 | | ATan2 | 17.4s | 16.1s | 8270 | 5961 | | IpPrefix | 56.8s | 🎉 2.6s | 40650 | 191 | | Equals | 69.9s | 50.6s | 30130 | 28131 | | NotEquals | 67.1s | 46.8s | 30100 | 28101 | | GreaterThan | 63.7s | 57.8s | 29940 | 27791 | | GreaterThanOrEqual | 61.1s | 61.6s | 29940 | 27791 | | LessThan | 63.7s | 61.3s | 29940 | 27791 | | LessThanOrEqual | 61.1s | 59.8s | 29940 | 27791 | | Case | 115.3s | 🎉 45.1s | 63756 | 13236 | | DateDiff | 3.4s | 4.0s?| 507 | 271 | | DateExtract | 12.1s | 3.4s | 3406 | 156 | | DateFormat | 8.1s | 2.4s | 2849 | 100 | | DateParse | 10.6s | 2.8s | 2992 | 276 | | DateTrunc | 10.9s | 3.4s | 3320 | 790 | | ByteLength | 5.7s | 4.0s | 520 | 391 | | EndsWith | 13.7s | 7.2s | 3880 | 1411 | | Hash | 30.7s | 17.4s | 3980 | 1511 | | LTrim | 27.1s | 29.0s?| 2840 | 2711| | Locate | 85.3s | 🎉 10.3s | 44310 | 1461 | | Replace | 96.5s | 🎉 10.1s | 42010 | 1711 | | RTrim | 15.6s | 20.0s?| 2840 | 2711 | | Split | 6.6s | 4.0s | 3360 | 397 | | StartsWith | 5.5s | 🎉 0.7s | 2800 | 330 | | Substring | 115.2s | 🎉 2.7s | 85386 | 483 | | Trim | 17.4s | 17.8s | 2840 | 2710 | Gradle Enterprise is also not happy with the raw *number* of tests ESQL runs. So lowering the overall number is important. See the table above. This strategy is *super* effective for that. It takes us ``` 769459 -> 470429 ``` * Compile plz
1 parent fc012fb commit 561e50c

File tree

73 files changed

+1673
-272
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

73 files changed

+1673
-272
lines changed

x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/AbstractFunctionTestCase.java

Lines changed: 9 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -191,7 +191,6 @@ protected static List<TestCaseSupplier> anyNullIsNull(
191191
ExpectedType expectedType,
192192
ExpectedEvaluatorToString evaluatorToString
193193
) {
194-
typesRequired(testCaseSuppliers);
195194
List<TestCaseSupplier> suppliers = new ArrayList<>(testCaseSuppliers.size());
196195
suppliers.addAll(testCaseSuppliers);
197196

@@ -274,7 +273,7 @@ protected static List<TestCaseSupplier> anyNullIsNull(
274273
}
275274

276275
@FunctionalInterface
277-
protected interface PositionalErrorMessageSupplier {
276+
public interface PositionalErrorMessageSupplier {
278277
/**
279278
* This interface defines functions to supply error messages for incorrect types in specific positions. Functions which have
280279
* the same type requirements for all positions can simplify this with a lambda returning a string constant.
@@ -291,7 +290,9 @@ protected interface PositionalErrorMessageSupplier {
291290
/**
292291
* Adds test cases containing unsupported parameter types that assert
293292
* that they throw type errors.
293+
* @deprecated make a subclass of {@link ErrorsForCasesWithoutExamplesTestCase} instead
294294
*/
295+
@Deprecated
295296
protected static List<TestCaseSupplier> errorsForCasesWithoutExamples(
296297
List<TestCaseSupplier> testCaseSuppliers,
297298
PositionalErrorMessageSupplier positionalErrorMessageSupplier
@@ -331,11 +332,14 @@ protected interface TypeErrorMessageSupplier {
331332
String apply(boolean includeOrdinal, List<Set<DataType>> validPerPosition, List<DataType> types);
332333
}
333334

335+
/**
336+
* @deprecated make a subclass of {@link ErrorsForCasesWithoutExamplesTestCase} instead
337+
*/
338+
@Deprecated
334339
protected static List<TestCaseSupplier> errorsForCasesWithoutExamples(
335340
List<TestCaseSupplier> testCaseSuppliers,
336341
TypeErrorMessageSupplier typeErrorMessageSupplier
337342
) {
338-
typesRequired(testCaseSuppliers);
339343
List<TestCaseSupplier> suppliers = new ArrayList<>(testCaseSuppliers.size());
340344
suppliers.addAll(testCaseSuppliers);
341345

@@ -346,7 +350,7 @@ protected static List<TestCaseSupplier> errorsForCasesWithoutExamples(
346350
.map(s -> s.types().size())
347351
.collect(Collectors.toSet())
348352
.stream()
349-
.flatMap(count -> allPermutations(count))
353+
.flatMap(AbstractFunctionTestCase::allPermutations)
350354
.filter(types -> valid.contains(types) == false)
351355
/*
352356
* Skip any cases with more than one null. Our tests don't generate
@@ -366,10 +370,6 @@ private static List<DataType> append(List<DataType> orig, DataType extra) {
366370
return longer;
367371
}
368372

369-
protected static Stream<DataType> representable() {
370-
return DataType.types().stream().filter(DataType::isRepresentable);
371-
}
372-
373373
protected static TestCaseSupplier typeErrorSupplier(
374374
boolean includeOrdinal,
375375
List<Set<DataType>> validPerPosition,
@@ -398,7 +398,7 @@ protected static TestCaseSupplier typeErrorSupplier(
398398
);
399399
}
400400

401-
private static List<Set<DataType>> validPerPosition(Set<List<DataType>> valid) {
401+
static List<Set<DataType>> validPerPosition(Set<List<DataType>> valid) {
402402
int max = valid.stream().mapToInt(List::size).max().getAsInt();
403403
List<Set<DataType>> result = new ArrayList<>(max);
404404
for (int i = 0; i < max; i++) {
@@ -1327,17 +1327,6 @@ public void allMemoryReleased() {
13271327
}
13281328
}
13291329

1330-
/**
1331-
* Validate that we know the types for all the test cases already created
1332-
* @param suppliers - list of suppliers before adding in the illegal type combinations
1333-
*/
1334-
protected static void typesRequired(List<TestCaseSupplier> suppliers) {
1335-
String bad = suppliers.stream().filter(s -> s.types() == null).map(s -> s.name()).collect(Collectors.joining("\n"));
1336-
if (bad.equals("") == false) {
1337-
throw new IllegalArgumentException("types required but not found for these tests:\n" + bad);
1338-
}
1339-
}
1340-
13411330
/**
13421331
* Returns true if the current test case is for an aggregation function.
13431332
* <p>

x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/AbstractScalarFunctionTestCase.java

Lines changed: 21 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,11 @@ public abstract class AbstractScalarFunctionTestCase extends AbstractFunctionTes
5858
* </p>
5959
*
6060
* @param entirelyNullPreservesType See {@link #anyNullIsNull(boolean, List)}
61+
* @deprecated use {@link #parameterSuppliersFromTypedDataWithDefaultChecksNoErrors}
62+
* and make a subclass of {@link ErrorsForCasesWithoutExamplesTestCase}.
63+
* It's a <strong>long</strong> faster.
6164
*/
65+
@Deprecated
6266
protected static Iterable<Object[]> parameterSuppliersFromTypedDataWithDefaultChecks(
6367
boolean entirelyNullPreservesType,
6468
List<TestCaseSupplier> suppliers,
@@ -72,6 +76,23 @@ protected static Iterable<Object[]> parameterSuppliersFromTypedDataWithDefaultCh
7276
);
7377
}
7478

79+
/**
80+
* Converts a list of test cases into a list of parameter suppliers.
81+
* Also, adds a default set of extra test cases.
82+
* <p>
83+
* Use if possible, as this method may get updated with new checks in the future.
84+
* </p>
85+
*
86+
* @param entirelyNullPreservesType See {@link #anyNullIsNull(boolean, List)}
87+
*/
88+
protected static Iterable<Object[]> parameterSuppliersFromTypedDataWithDefaultChecksNoErrors(
89+
// TODO remove after removing parameterSuppliersFromTypedDataWithDefaultChecks rename this to that.
90+
boolean entirelyNullPreservesType,
91+
List<TestCaseSupplier> suppliers
92+
) {
93+
return parameterSuppliersFromTypedData(anyNullIsNull(entirelyNullPreservesType, randomizeBytesRefsOffset(suppliers)));
94+
}
95+
7596
/**
7697
* Converts a list of test cases into a list of parameter suppliers.
7798
* Also, adds a default set of extra test cases.
@@ -364,43 +385,10 @@ public void testFold() {
364385
}
365386
}
366387

367-
public static String errorMessageStringForBinaryOperators(
368-
boolean includeOrdinal,
369-
List<Set<DataType>> validPerPosition,
370-
List<DataType> types,
371-
PositionalErrorMessageSupplier positionalErrorMessageSupplier
372-
) {
373-
try {
374-
return typeErrorMessage(includeOrdinal, validPerPosition, types, positionalErrorMessageSupplier);
375-
} catch (IllegalStateException e) {
376-
// This means all the positional args were okay, so the expected error is from the combination
377-
if (types.get(0).equals(DataType.UNSIGNED_LONG)) {
378-
return "first argument of [] is [unsigned_long] and second is ["
379-
+ types.get(1).typeName()
380-
+ "]. [unsigned_long] can only be operated on together with another [unsigned_long]";
381-
382-
}
383-
if (types.get(1).equals(DataType.UNSIGNED_LONG)) {
384-
return "first argument of [] is ["
385-
+ types.get(0).typeName()
386-
+ "] and second is [unsigned_long]. [unsigned_long] can only be operated on together with another [unsigned_long]";
387-
}
388-
return "first argument of [] is ["
389-
+ (types.get(0).isNumeric() ? "numeric" : types.get(0).typeName())
390-
+ "] so second argument must also be ["
391-
+ (types.get(0).isNumeric() ? "numeric" : types.get(0).typeName())
392-
+ "] but was ["
393-
+ types.get(1).typeName()
394-
+ "]";
395-
396-
}
397-
}
398-
399388
/**
400389
* Adds test cases containing unsupported parameter types that immediately fail.
401390
*/
402391
protected static List<TestCaseSupplier> failureForCasesWithoutExamples(List<TestCaseSupplier> testCaseSuppliers) {
403-
typesRequired(testCaseSuppliers);
404392
List<TestCaseSupplier> suppliers = new ArrayList<>(testCaseSuppliers.size());
405393
suppliers.addAll(testCaseSuppliers);
406394

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,167 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License
4+
* 2.0; you may not use this file except in compliance with the Elastic License
5+
* 2.0.
6+
*/
7+
8+
package org.elasticsearch.xpack.esql.expression.function;
9+
10+
import org.elasticsearch.test.ESTestCase;
11+
import org.elasticsearch.xpack.esql.core.expression.Expression;
12+
import org.elasticsearch.xpack.esql.core.expression.TypeResolutions;
13+
import org.elasticsearch.xpack.esql.core.tree.Source;
14+
import org.elasticsearch.xpack.esql.core.type.DataType;
15+
import org.hamcrest.Matcher;
16+
17+
import java.util.ArrayList;
18+
import java.util.List;
19+
import java.util.Locale;
20+
import java.util.Set;
21+
import java.util.stream.Collectors;
22+
import java.util.stream.Stream;
23+
24+
import static org.elasticsearch.xpack.esql.EsqlTestUtils.randomLiteral;
25+
import static org.hamcrest.Matchers.greaterThan;
26+
27+
public abstract class ErrorsForCasesWithoutExamplesTestCase extends ESTestCase {
28+
protected abstract List<TestCaseSupplier> cases();
29+
30+
/**
31+
* Build the expression being tested, for the given source and list of arguments. Test classes need to implement this
32+
* to have something to test.
33+
*
34+
* @param source the source
35+
* @param args arg list from the test case, should match the length expected
36+
* @return an expression for evaluating the function being tested on the given arguments
37+
*/
38+
protected abstract Expression build(Source source, List<Expression> args);
39+
40+
protected abstract Matcher<String> expectedTypeErrorMatcher(List<Set<DataType>> validPerPosition, List<DataType> signature);
41+
42+
protected final List<TestCaseSupplier> paramsToSuppliers(Iterable<Object[]> cases) {
43+
List<TestCaseSupplier> result = new ArrayList<>();
44+
for (Object[] c : cases) {
45+
if (c.length != 1) {
46+
throw new IllegalArgumentException("weird layout for test cases");
47+
}
48+
TestCaseSupplier supplier = (TestCaseSupplier) c[0];
49+
result.add(supplier);
50+
}
51+
return result;
52+
}
53+
54+
public final void test() {
55+
int checked = 0;
56+
List<TestCaseSupplier> cases = cases();
57+
Set<List<DataType>> valid = cases.stream().map(TestCaseSupplier::types).collect(Collectors.toSet());
58+
List<Set<DataType>> validPerPosition = AbstractFunctionTestCase.validPerPosition(valid);
59+
Iterable<List<DataType>> missingSignatures = missingSignatures(cases, valid)::iterator;
60+
for (List<DataType> signature : missingSignatures) {
61+
logger.debug("checking {}", signature);
62+
List<Expression> args = new ArrayList<>(signature.size());
63+
for (DataType type : signature) {
64+
args.add(randomLiteral(type));
65+
}
66+
Expression expression = build(Source.synthetic(sourceForSignature(signature)), args);
67+
assertTrue("expected unresolved " + expression, expression.typeResolved().unresolved());
68+
assertThat(expression.typeResolved().message(), expectedTypeErrorMatcher(validPerPosition, signature));
69+
checked++;
70+
}
71+
logger.info("checked {} signatures", checked);
72+
assertThat("didn't check any signatures", checked, greaterThan(0));
73+
}
74+
75+
private Stream<List<DataType>> missingSignatures(List<TestCaseSupplier> cases, Set<List<DataType>> valid) {
76+
return cases.stream()
77+
.map(s -> s.types().size())
78+
.collect(Collectors.toSet())
79+
.stream()
80+
.flatMap(AbstractFunctionTestCase::allPermutations)
81+
.filter(types -> valid.contains(types) == false)
82+
/*
83+
* Skip any cases with more than one null. Our tests don't generate
84+
* the full combinatorial explosions of all nulls - just a single null.
85+
* Hopefully <null>, <null> cases will function the same as <null>, <valid>
86+
* cases.
87+
*/
88+
.filter(types -> types.stream().filter(t -> t == DataType.NULL).count() <= 1);
89+
}
90+
91+
protected static String sourceForSignature(List<DataType> signature) {
92+
StringBuilder source = new StringBuilder();
93+
for (DataType type : signature) {
94+
if (false == source.isEmpty()) {
95+
source.append(", ");
96+
}
97+
source.append(type.typeName());
98+
}
99+
return source.toString();
100+
}
101+
102+
/**
103+
* Build the expected error message for an invalid type signature.
104+
*/
105+
protected static String typeErrorMessage(
106+
boolean includeOrdinal,
107+
List<Set<DataType>> validPerPosition,
108+
List<DataType> signature,
109+
AbstractFunctionTestCase.PositionalErrorMessageSupplier expectedTypeSupplier
110+
) {
111+
int badArgPosition = -1;
112+
for (int i = 0; i < signature.size(); i++) {
113+
if (validPerPosition.get(i).contains(signature.get(i)) == false) {
114+
badArgPosition = i;
115+
break;
116+
}
117+
}
118+
if (badArgPosition == -1) {
119+
throw new IllegalStateException(
120+
"Can't generate error message for these types, you probably need a custom error message function"
121+
);
122+
}
123+
String ordinal = includeOrdinal ? TypeResolutions.ParamOrdinal.fromIndex(badArgPosition).name().toLowerCase(Locale.ROOT) + " " : "";
124+
String source = sourceForSignature(signature);
125+
String expectedTypeString = expectedTypeSupplier.apply(validPerPosition.get(badArgPosition), badArgPosition);
126+
String name = signature.get(badArgPosition).typeName();
127+
return ordinal + "argument of [" + source + "] must be [" + expectedTypeString + "], found value [] type [" + name + "]";
128+
}
129+
130+
protected static String errorMessageStringForBinaryOperators(
131+
List<Set<DataType>> validPerPosition,
132+
List<DataType> signature,
133+
AbstractFunctionTestCase.PositionalErrorMessageSupplier positionalErrorMessageSupplier
134+
) {
135+
try {
136+
return typeErrorMessage(true, validPerPosition, signature, positionalErrorMessageSupplier);
137+
} catch (IllegalStateException e) {
138+
String source = sourceForSignature(signature);
139+
// This means all the positional args were okay, so the expected error is from the combination
140+
if (signature.get(0).equals(DataType.UNSIGNED_LONG)) {
141+
return "first argument of ["
142+
+ source
143+
+ "] is [unsigned_long] and second is ["
144+
+ signature.get(1).typeName()
145+
+ "]. [unsigned_long] can only be operated on together with another [unsigned_long]";
146+
147+
}
148+
if (signature.get(1).equals(DataType.UNSIGNED_LONG)) {
149+
return "first argument of ["
150+
+ source
151+
+ "] is ["
152+
+ signature.get(0).typeName()
153+
+ "] and second is [unsigned_long]. [unsigned_long] can only be operated on together with another [unsigned_long]";
154+
}
155+
return "first argument of ["
156+
+ source
157+
+ "] is ["
158+
+ (signature.get(0).isNumeric() ? "numeric" : signature.get(0).typeName())
159+
+ "] so second argument must also be ["
160+
+ (signature.get(0).isNumeric() ? "numeric" : signature.get(0).typeName())
161+
+ "] but was ["
162+
+ signature.get(1).typeName()
163+
+ "]";
164+
165+
}
166+
}
167+
}
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License
4+
* 2.0; you may not use this file except in compliance with the Elastic License
5+
* 2.0.
6+
*/
7+
8+
package org.elasticsearch.xpack.esql.expression.function.grouping;
9+
10+
import org.elasticsearch.xpack.esql.core.expression.Expression;
11+
import org.elasticsearch.xpack.esql.core.tree.Source;
12+
import org.elasticsearch.xpack.esql.core.type.DataType;
13+
import org.elasticsearch.xpack.esql.expression.function.ErrorsForCasesWithoutExamplesTestCase;
14+
import org.elasticsearch.xpack.esql.expression.function.TestCaseSupplier;
15+
import org.hamcrest.Matcher;
16+
17+
import java.util.List;
18+
import java.util.Set;
19+
20+
import static org.hamcrest.Matchers.equalTo;
21+
22+
public class CategorizeErrorTests extends ErrorsForCasesWithoutExamplesTestCase {
23+
@Override
24+
protected List<TestCaseSupplier> cases() {
25+
return paramsToSuppliers(CategorizeTests.parameters());
26+
}
27+
28+
@Override
29+
protected Expression build(Source source, List<Expression> args) {
30+
return new Categorize(source, args.get(0));
31+
}
32+
33+
@Override
34+
protected Matcher<String> expectedTypeErrorMatcher(List<Set<DataType>> validPerPosition, List<DataType> signature) {
35+
return equalTo(typeErrorMessage(false, validPerPosition, signature, (v, p) -> "string"));
36+
}
37+
}

x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/grouping/CategorizeTests.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ public static Iterable<Object[]> parameters() {
5151
)
5252
);
5353
}
54-
return parameterSuppliersFromTypedDataWithDefaultChecks(true, suppliers, (v, p) -> "string");
54+
return parameterSuppliersFromTypedDataWithDefaultChecksNoErrors(true, suppliers);
5555
}
5656

5757
@Override

0 commit comments

Comments
 (0)