Skip to content

Commit 8ee236e

Browse files
committed
Improve ES|QL CsvTests for random sampling by asserting numbers are within a range.
1 parent bee48f8 commit 8ee236e

File tree

3 files changed

+64
-59
lines changed

3 files changed

+64
-59
lines changed

x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvAssert.java

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -236,7 +236,7 @@ public static void assertData(
236236

237237
var transformedExpected = valueTransformer.apply(expectedType, expectedValue);
238238
var transformedActual = valueTransformer.apply(expectedType, actualValue);
239-
if (Objects.equals(transformedExpected, transformedActual) == false) {
239+
if (equals(transformedExpected, transformedActual) == false) {
240240
dataFailures.add(new DataFailure(row, column, transformedExpected, transformedActual));
241241
}
242242
if (dataFailures.size() > 10) {
@@ -275,6 +275,24 @@ public static void assertData(
275275
}
276276
}
277277

278+
private static boolean equals(Object expected, Object actual) {
279+
if (expected instanceof List<?> expectedList && actual instanceof List<?> actualList) {
280+
if (expectedList.size() != actualList.size()) {
281+
return false;
282+
}
283+
for (int i = 0; i < expectedList.size(); i++) {
284+
if (equals(expectedList.get(i), actualList.get(i)) == false) {
285+
return false;
286+
}
287+
}
288+
return true;
289+
} else if (expected instanceof CsvTestUtils.Range expectedRange) {
290+
return expectedRange.includes(actual);
291+
} else {
292+
return Objects.equals(expected, actual);
293+
}
294+
}
295+
278296
private static void dataFailure(
279297
String description,
280298
List<DataFailure> dataFailures,

x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestUtils.java

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,13 @@ public final class CsvTestUtils {
8282
public static final String COMMA_ESCAPING_REGEX = "(?<!\\" + ESCAPE_CHAR + "),";
8383
public static final String ESCAPED_COMMA_SEQUENCE = ESCAPE_CHAR + ",";
8484

85+
public record Range(Object lowerBound, Object upperBound) {
86+
@SuppressWarnings("unchecked")
87+
<T extends Comparable<T>> boolean includes(Object value) {
88+
return ((T) value).compareTo((T) lowerBound) >= 0 && ((T) value).compareTo((T) upperBound) <= 0;
89+
}
90+
}
91+
8592
private CsvTestUtils() {}
8693

8794
public static boolean isEnabled(String testName, String instructions, Version version) {
@@ -400,13 +407,13 @@ public static ExpectedResults loadCsvSpecValues(String csv) {
400407
// split on commas but ignoring escaped commas
401408
String[] multiValues = value.substring(1, value.length() - 1).split(COMMA_ESCAPING_REGEX);
402409
if (multiValues.length == 1) {
403-
rowValues.add(columnTypes.get(i).convert(multiValues[0].replace(ESCAPED_COMMA_SEQUENCE, ",")));
410+
rowValues.add(convert(multiValues[0].replace(ESCAPED_COMMA_SEQUENCE, ","), columnTypes.get(i)));
404411
continue;
405412
}
406413
List<Object> listOfMvValues = new ArrayList<>();
407414
for (String mvValue : multiValues) {
408415
try {
409-
listOfMvValues.add(columnTypes.get(i).convert(mvValue.trim().replace(ESCAPED_COMMA_SEQUENCE, ",")));
416+
listOfMvValues.add(convert(mvValue.trim().replace(ESCAPED_COMMA_SEQUENCE, ","), columnTypes.get(i)));
410417
} catch (IllegalArgumentException e) {
411418
throw new IllegalArgumentException(
412419
"Error parsing multi-value field ["
@@ -424,7 +431,7 @@ public static ExpectedResults loadCsvSpecValues(String csv) {
424431
} else {
425432
// The value considered here is the one where any potential escaped comma is kept as is (with the escape char)
426433
// TODO if we'd want escaped commas outside multi-values fields, we'd have to adjust this value here as well
427-
rowValues.add(columnTypes.get(i).convert(value));
434+
rowValues.add(convert(value, columnTypes.get(i)));
428435
}
429436
}
430437
values.add(rowValues);
@@ -436,6 +443,17 @@ public static ExpectedResults loadCsvSpecValues(String csv) {
436443
}
437444
}
438445

446+
private static Object convert(String value, Type type) {
447+
if (Number.class.isAssignableFrom(type.clazz()) && value.startsWith("<") && value.endsWith(">") && value.contains("-")) {
448+
// Numbers of the form "<lower-upper>" are parsed to a Range.
449+
int separator = value.indexOf('-');
450+
Object lowerBound = type.converter.apply(value.substring(1, separator).trim());
451+
Object upperBound = type.converter.apply(value.substring(separator + 1, value.length() - 1).trim());
452+
return new Range(lowerBound, upperBound);
453+
}
454+
return type.converter.apply(value);
455+
}
456+
439457
private static final String TYPECAST_SPACER = "__TYPECAST__";
440458

441459
private static String escapeTypecast(String typecast) {

x-pack/plugin/esql/qa/testFixtures/src/main/resources/sample.csv-spec

Lines changed: 24 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,6 @@
55
// they also assert the value of MV_COUNT(VALUES(...)), which is not adjusted for
66
// the sampling and therefore gives the size of the sample.
77
// All ranges are very loose, so that the tests should practically never fail.
8-
// The range checks are done in ES|QL, resulting in one boolean value (is_expected),
9-
// because the CSV tests don't support such assertions.
108

119
row
1210
required_capability: sample_v3
@@ -40,14 +38,10 @@ required_capability: sample_v3
4038
FROM employees
4139
| SAMPLE 0.5
4240
| STATS count = COUNT(), avg_emp_no = AVG(emp_no), sum_emp_no = SUM(emp_no)
43-
| EVAL is_expected = count >= 10 AND count <= 90 AND
44-
avg_emp_no > 10010 AND avg_emp_no < 10090 AND
45-
sum_emp_no > 10*10010 AND sum_emp_no < 90*10090
46-
| KEEP is_expected
4741
;
4842

49-
is_expected:boolean
50-
true
43+
count:long | avg_emp_no:double | sum_emp_no:long
44+
<10-90> | <10010-10090> | <100100-908100>
5145
;
5246

5347

@@ -58,13 +52,10 @@ FROM employees
5852
| SAMPLE 0.5
5953
| WHERE emp_no > 10050
6054
| STATS count = COUNT(), avg_emp_no = AVG(emp_no)
61-
| EVAL is_expected = count >= 2 AND count <= 48 AND
62-
avg_emp_no > 10055 AND avg_emp_no < 10095
63-
| KEEP is_expected
6455
;
6556

66-
is_expected:boolean
67-
true
57+
count:long | avg_emp_no:double
58+
<2-48> | <10055-10095>
6859
;
6960

7061

@@ -75,13 +66,10 @@ FROM employees
7566
| WHERE emp_no <= 10050
7667
| SAMPLE 0.5
7768
| STATS count = COUNT(), avg_emp_no = AVG(emp_no)
78-
| EVAL is_expected = count >= 2 AND count <= 48 AND
79-
avg_emp_no > 10005 AND avg_emp_no < 10045
80-
| KEEP is_expected
8169
;
8270

83-
is_expected:boolean
84-
true
71+
count:long | avg_emp_no:double
72+
<2-48> | <10005-10045>
8573
;
8674

8775

@@ -92,13 +80,10 @@ FROM employees
9280
| SAMPLE 0.5
9381
| SORT emp_no
9482
| STATS count = COUNT(), avg_emp_no = AVG(emp_no)
95-
| EVAL is_expected = count >= 10 AND count <= 90 AND
96-
avg_emp_no > 10010 AND avg_emp_no < 10090
97-
| KEEP is_expected
9883
;
9984

100-
is_expected:boolean
101-
true
85+
count:long | avg_emp_no:double
86+
<10-90> | <10010-10090>
10287
;
10388

10489

@@ -109,13 +94,10 @@ FROM employees
10994
| SORT emp_no
11095
| SAMPLE 0.5
11196
| STATS count = COUNT(), avg_emp_no = AVG(emp_no)
112-
| EVAL is_expected = count >= 10 AND count <= 90 AND
113-
avg_emp_no > 10010 AND avg_emp_no < 10090
114-
| KEEP is_expected
11597
;
11698

117-
is_expected:boolean
118-
true
99+
count:long | avg_emp_no:double
100+
<10-90> | <10010-10090>
119101
;
120102

121103

@@ -126,12 +108,10 @@ FROM employees
126108
| SAMPLE 0.5
127109
| LIMIT 10
128110
| STATS count = COUNT(emp_no)
129-
| EVAL is_expected = count == 10
130-
| KEEP is_expected
131111
;
132112

133-
is_expected:boolean
134-
true
113+
count:long
114+
10
135115
;
136116

137117

@@ -142,12 +122,10 @@ FROM employees
142122
| LIMIT 50
143123
| SAMPLE 0.5
144124
| STATS count = COUNT(emp_no)
145-
| EVAL is_expected = count >= 2 AND count <= 48
146-
| KEEP is_expected
147125
;
148126

149-
is_expected:boolean
150-
true
127+
count:long
128+
<2-48>
151129
;
152130

153131

@@ -160,12 +138,11 @@ ROW x = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27
160138
| MV_EXPAND y
161139
| STATS count = COUNT() BY x
162140
| STATS counts = VALUES(count)
163-
| EVAL is_expected = MV_COUNT(counts) == 1 AND MV_MIN(counts) == 2
164-
| KEEP is_expected
141+
165142
;
166143

167-
is_expected:boolean
168-
true
144+
counts:long
145+
2
169146
;
170147

171148

@@ -177,13 +154,11 @@ ROW x = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27
177154
| MV_EXPAND y
178155
| SAMPLE 0.85
179156
| STATS count = COUNT() BY x
180-
| STATS counts = VALUES(count)
181-
| EVAL is_expected = MV_COUNT(counts) == 2 AND MV_MIN(counts) == 1 AND MV_MAX(counts) == 2
182-
| KEEP is_expected
157+
| STATS counts = MV_SORT(VALUES(count))
183158
;
184159

185-
is_expected:boolean
186-
true
160+
counts:long
161+
[1,2]
187162
;
188163

189164

@@ -195,13 +170,10 @@ FROM employees
195170
| SAMPLE 0.8
196171
| SAMPLE 0.9
197172
| STATS count = COUNT(), avg_emp_no = AVG(emp_no)
198-
| EVAL is_expected = count >= 10 AND count <= 90 AND
199-
avg_emp_no > 10010 AND avg_emp_no < 10090
200-
| KEEP is_expected
201173
;
202174

203-
is_expected:boolean
204-
true
175+
count:long | avg_emp_no:double
176+
<10-90> | <10010-10090>
205177
;
206178

207179

@@ -213,13 +185,10 @@ FROM employees
213185
| STATS avg_salary = AVG(salary) BY job_positions
214186
| SAMPLE 0.8
215187
| STATS count = COUNT(), avg_avg_salary = AVG(avg_salary)
216-
| EVAL is_expected = count >= 1 AND count <= 16 AND
217-
avg_avg_salary > 25000 AND avg_avg_salary < 75000
218-
| KEEP is_expected
219188
;
220189

221-
is_expected:boolean
222-
true
190+
count:long | avg_avg_salary:double
191+
<1-16> | <25000-75000>
223192
;
224193

225194

0 commit comments

Comments
 (0)