Skip to content

Commit 00c5efa

Browse files
committed
CSV tests
1 parent 986aa21 commit 00c5efa

File tree

8 files changed

+500685
-12
lines changed

8 files changed

+500685
-12
lines changed

x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvSpecReader.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ public static class CsvSpecParser implements SpecReader.Parser {
3131
private final StringBuilder query = new StringBuilder();
3232
private final StringBuilder data = new StringBuilder();
3333
private final List<String> requiredCapabilities = new ArrayList<>();
34+
private boolean approximate = false;
3435
private CsvTestCase testCase;
3536

3637
private CsvSpecParser() {}
@@ -44,6 +45,8 @@ public Object parse(String line) {
4445
earlySchema.append(line.substring(SCHEMA_PREFIX.length()).trim());
4546
} else if (line.toLowerCase(Locale.ROOT).startsWith("required_capability:")) {
4647
requiredCapabilities.add(line.substring("required_capability:".length()).trim());
48+
} else if (line.toLowerCase(Locale.ROOT).startsWith("approximate:")) {
49+
approximate = Boolean.parseBoolean(line.substring("approximate:".length()).trim());
4750
} else {
4851
if (line.endsWith(";")) {
4952
// pick up the query
@@ -52,6 +55,7 @@ public Object parse(String line) {
5255
testCase.query = query.toString();
5356
testCase.earlySchema = earlySchema.toString();
5457
testCase.requiredCapabilities = List.copyOf(requiredCapabilities);
58+
testCase.approximate = approximate;
5559
requiredCapabilities.clear();
5660
earlySchema.setLength(0);
5761
query.setLength(0);
@@ -116,6 +120,7 @@ public static class CsvTestCase {
116120
private final List<Pattern> expectedWarningsRegex = new ArrayList<>();
117121
public boolean ignoreOrder;
118122
public List<String> requiredCapabilities = List.of();
123+
public boolean approximate = false;
119124

120125
/**
121126
* Returns the warning headers expected to be added by the test. To declare such a header, use the `warning:definition` format

x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,7 @@ public class CsvTestsDataLoader {
155155
private static final TestDataset MV_TEXT = new TestDataset("mv_text");
156156
private static final TestDataset DENSE_VECTOR = new TestDataset("dense_vector");
157157
private static final TestDataset COLORS = new TestDataset("colors");
158+
private static final TestDataset MANY_NUMBERS = new TestDataset("many_numbers");
158159

159160
public static final Map<String, TestDataset> CSV_DATASET_MAP = Map.ofEntries(
160161
Map.entry(EMPLOYEES.indexName, EMPLOYEES),
@@ -219,7 +220,8 @@ public class CsvTestsDataLoader {
219220
Map.entry(LOGS.indexName, LOGS),
220221
Map.entry(MV_TEXT.indexName, MV_TEXT),
221222
Map.entry(DENSE_VECTOR.indexName, DENSE_VECTOR),
222-
Map.entry(COLORS.indexName, COLORS)
223+
Map.entry(COLORS.indexName, COLORS),
224+
Map.entry(MANY_NUMBERS.indexName, MANY_NUMBERS)
223225
);
224226

225227
private static final EnrichConfig LANGUAGES_ENRICH = new EnrichConfig("languages_policy", "enrich-policy-languages.json");
Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,159 @@
1+
// Tests focused on query approximation.
2+
// Note: this tests only basic behavior, because of limitations of the CSV tests.
3+
// Most tests assert that the count, average and sum of some values are within a
4+
// range. All ranges are very loose, so that the tests should practically never fail.
5+
// The range checks are done in ES|QL, resulting in one boolean value (is_expected),
6+
// because the CSV tests don't support such assertions.
7+
8+
approximate stats on large data
9+
approximate: true
10+
11+
FROM many_numbers
12+
| STATS count=COUNT(), avg=AVG(num), sum=SUM(num), min=MIN(num), max=MAX(num)
13+
| EVAL is_expected = count >= 450000 AND count <= 550000 AND
14+
avg >= 600 AND avg <= 750 AND
15+
sum >= 300000000 AND sum <= 380000000 AND
16+
min >= 0 AND min <= 100 AND
17+
max == 1000
18+
| KEEP is_expected
19+
;
20+
21+
is_expected:boolean
22+
true
23+
;
24+
25+
26+
exact stats on small data
27+
approximate: true
28+
29+
FROM many_numbers
30+
| WHERE num <= 25
31+
| STATS count=COUNT(), avg=AVG(num), sum=SUM(num), min=MIN(num), max=MAX(num)
32+
;
33+
34+
count:long | avg:double | sum:long | min:integer | max:integer
35+
325 | 17.0 | 5525 | 1 | 25
36+
;
37+
38+
39+
with where
40+
approximate: true
41+
42+
FROM many_numbers
43+
| WHERE num >= 500
44+
| STATS count=COUNT(), avg=AVG(num), sum=SUM(num), min=MIN(num), max=MAX(num)
45+
| EVAL is_expected = count >= 300000 AND count <= 450000 AND
46+
avg >= 700 AND avg <= 850 AND
47+
sum >= 270000000 AND sum <= 310000000 AND
48+
min >= 500 AND
49+
max == 1000
50+
| KEEP is_expected
51+
;
52+
53+
is_expected:boolean
54+
true
55+
;
56+
57+
58+
with sample
59+
approximate: true
60+
61+
FROM many_numbers
62+
| SAMPLE 0.5
63+
| STATS count=COUNT(), avg=AVG(num), sum=SUM(num), min=MIN(num), max=MAX(num)
64+
| EVAL is_expected = count >= 200000 AND count <= 300000 AND
65+
avg >= 600 AND avg <= 750 AND
66+
sum >= 140000000 AND sum <= 200000000 AND
67+
min >= 0 AND min <= 100 AND
68+
max == 1000
69+
| KEEP is_expected
70+
;
71+
72+
is_expected:boolean
73+
true
74+
;
75+
76+
77+
with commands inbetween
78+
approximate: true
79+
80+
FROM many_numbers
81+
| EVAL num2 = 2 * num
82+
| DROP num
83+
| SORT num2 DESC
84+
| RENAME num2 AS num3
85+
| EVAL num4 = TO_STRING(num3)
86+
| GROK num4 "%{NUMBER:num5}"
87+
| EVAL num5 = TO_INTEGER(num5)
88+
| KEEP num3, num5
89+
| STATS count=COUNT(), avg=AVG(num5), sum=SUM(num5), min=MIN(num5), max=MAX(num5)
90+
| EVAL is_expected = count >= 450000 AND count <= 550000 AND
91+
avg >= 1200 AND avg <= 1500 AND
92+
sum >= 600000000 AND sum <= 760000000 AND
93+
min >= 0 AND min <= 200 AND
94+
max == 2000
95+
| KEEP is_expected
96+
;
97+
98+
is_expected:boolean
99+
true
100+
;
101+
102+
103+
with commands after
104+
approximate: true
105+
106+
FROM many_numbers
107+
| STATS count=COUNT(), avg=AVG(num), sum=SUM(num), min=MIN(num), max=MAX(num)
108+
| EVAL avg2 = 2 * avg
109+
| LIMIT 10
110+
| MV_EXPAND min
111+
| SORT count ASC
112+
| EVAL is_expected = count >= 450000 AND count <= 550000 AND
113+
avg2 >= 1200 AND avg2 <= 1500 AND
114+
sum >= 300000000 AND sum <= 380000000 AND
115+
min >= 0 AND min <= 100 AND
116+
max == 1000
117+
| KEEP is_expected
118+
;
119+
120+
is_expected:boolean
121+
true
122+
;
123+
124+
125+
approximate stats by on large data
126+
approximate: true
127+
128+
FROM many_numbers
129+
| STATS count=COUNT() BY num
130+
| SORT num DESC
131+
| LIMIT 5
132+
| EVAL is_expected = count >= 100 AND count <= 2000
133+
| KEEP num, is_expected
134+
;
135+
136+
num:integer | is_expected:boolean
137+
1000 | true
138+
999 | true
139+
998 | true
140+
997 | true
141+
996 | true
142+
;
143+
144+
145+
exact stats by on small data
146+
approximate: true
147+
148+
FROM many_numbers
149+
| WHERE num <= 5
150+
| STATS count=COUNT() BY num
151+
;
152+
153+
count:long | num:integer
154+
1 | 1
155+
2 | 2
156+
3 | 3
157+
4 | 4
158+
5 | 5
159+
;

0 commit comments

Comments
 (0)