Skip to content

Commit 05f1151

Browse files
committed
CSV tests
1 parent a25940c commit 05f1151

File tree

3 files changed

+136
-106
lines changed

3 files changed

+136
-106
lines changed

x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvAssert.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -280,7 +280,7 @@ private static void dataFailure(
280280
fail(description + System.lineSeparator() + describeFailures(dataFailures) + actual + expected);
281281
}
282282

283-
private static final int MAX_ROWS = 25;
283+
private static final int MAX_ROWS = 1000;
284284

285285
private static String pipeTable(
286286
String description,
Lines changed: 134 additions & 104 deletions
Original file line numberDiff line numberDiff line change
@@ -1,204 +1,234 @@
11
// Tests focused on the SAMPLE command
2+
// Note: this tests only basic behavior, because of limitations of the CSV tests.
3+
// Most tests assert that the count, average and sum of some values are within a
4+
// range. These stats should be correctly adjusted for the sampling. Furthermore,
5+
// they also assert the value of MV_COUNT(VALUES(...)), which is not adjusted for
6+
// the sampling and therefore gives the size of the sample.
7+
// All ranges are very loose, so that the tests should fail less than 1 in a billion.
8+
// The range checks are done in ES|QL, resulting in one boolean value (is_expected),
9+
// because the CSV tests don't support such assertions.
210

311
row
412
required_capability: sample
513

6-
ROW x = 1
7-
| SAMPLE .99 9
14+
ROW x = 1 | SAMPLE .999999999
815
;
916

1017
x:integer
1118
1
1219
;
1320

14-
topN_PushedDown
21+
22+
row and mv_expand
1523
required_capability: sample
1624

17-
FROM employees
18-
| SAMPLE .5 0
19-
| SORT emp_no
20-
| LIMIT 5
21-
| STATS empNoMax = MAX(emp_no)
22-
| EVAL correct = empNoMax < 10030
23-
| KEEP correct
25+
ROW x = [1,2,3,4,5] | MV_EXPAND x | SAMPLE .999999999
2426
;
2527

26-
correct:boolean
27-
true
28+
x:integer
29+
1
30+
2
31+
3
32+
4
33+
5
2834
;
2935

30-
topN_Operator
36+
37+
adjust stats for sampling
3138
required_capability: sample
3239

3340
FROM employees
34-
| WHERE LENGTH(CONCAT(last_name, "foo")) > 3
35-
| SAMPLE .5 0
36-
| SORT emp_no
37-
| LIMIT 5
38-
| STATS empNoMax = MAX(emp_no)
39-
| EVAL correct = empNoMax < 10030
40-
| KEEP correct
41+
| SAMPLE 0.5
42+
| STATS count = COUNT(), values_count = MV_COUNT(VALUES(emp_no)), avg_emp_no = AVG(emp_no), sum_emp_no = SUM(emp_no)
43+
| EVAL is_expected = count >= 40 AND count <= 160 AND
44+
values_count >= 20 AND values_count <= 80 AND
45+
avg_emp_no > 10010 AND avg_emp_no < 10090 AND
46+
sum_emp_no > 40*10010 AND sum_emp_no < 160*10090
47+
| KEEP is_expected
4148
;
4249

43-
correct:boolean
50+
is_expected:boolean
4451
true
4552
;
4653

47-
aggCount_PushedDown
54+
55+
before where
4856
required_capability: sample
4957

5058
FROM employees
51-
| SAMPLE .5 123
52-
| STATS count = COUNT(*)
53-
| EVAL correct = count > 50 AND count < 150
54-
| KEEP correct
59+
| SAMPLE 0.5
60+
| WHERE emp_no > 10050
61+
| STATS count = COUNT(), values_count = MV_COUNT(VALUES(emp_no)), avg_emp_no = AVG(emp_no)
62+
| EVAL is_expected = count >= 10 AND count <= 90 AND
63+
values_count >= 5 AND values_count <= 45 AND
64+
avg_emp_no > 10055 AND avg_emp_no < 10095
65+
| KEEP is_expected
5566
;
5667

57-
correct:boolean
68+
is_expected:boolean
5869
true
5970
;
6071

61-
aggCount_Operator
72+
73+
after where
6274
required_capability: sample
6375

6476
FROM employees
65-
| WHERE LENGTH(CONCAT(last_name, "foo")) > 3
66-
| SAMPLE .5 123
67-
| STATS count = COUNT(*)
68-
| EVAL correct = count > 50 AND count < 150
69-
| KEEP correct
77+
| WHERE emp_no <= 10050
78+
| SAMPLE 0.5
79+
| STATS count = COUNT(), values_count = MV_COUNT(VALUES(emp_no)), avg_emp_no = AVG(emp_no)
80+
| EVAL is_expected = count >= 10 AND count <= 90 AND
81+
values_count >= 5 AND values_count <= 45 AND
82+
avg_emp_no > 10005 AND avg_emp_no < 10045
83+
| KEEP is_expected
7084
;
7185

72-
correct:boolean
86+
is_expected:boolean
7387
true
7488
;
7589

76-
aggCountNoSeed_PushedDown
90+
91+
before sort
7792
required_capability: sample
7893

7994
FROM employees
80-
| SAMPLE .5
81-
| STATS count = COUNT(*)
82-
| EVAL correct = count > 50 AND count < 150
83-
| KEEP correct
95+
| SAMPLE 0.5
96+
| SORT emp_no
97+
| STATS count = COUNT(), values_count = MV_COUNT(VALUES(emp_no)), avg_emp_no = AVG(emp_no)
98+
| EVAL is_expected = count >= 40 AND count <= 160 AND
99+
values_count >= 20 AND values_count <= 80 AND
100+
avg_emp_no > 10010 AND avg_emp_no < 10090
101+
| KEEP is_expected
84102
;
85103

86-
correct:boolean
104+
is_expected:boolean
87105
true
88106
;
89107

90-
aggCountNoSeed_Operator
108+
109+
after sort
91110
required_capability: sample
92111

93112
FROM employees
94-
| WHERE LENGTH(CONCAT(last_name, "foo")) > 3
95-
| SAMPLE .5
96-
| STATS count = COUNT(*)
97-
| EVAL inRange = count > 50 AND count < 150
98-
| KEEP inRange
113+
| SORT emp_no
114+
| SAMPLE 0.5
115+
| STATS count = COUNT(), values_count = MV_COUNT(VALUES(emp_no)), avg_emp_no = AVG(emp_no)
116+
| EVAL is_expected = count >= 40 AND count <= 160 AND
117+
values_count >= 20 AND values_count <= 80 AND
118+
avg_emp_no > 10010 AND avg_emp_no < 10090
119+
| KEEP is_expected
99120
;
100121

101-
inRange:boolean
122+
is_expected:boolean
102123
true
103124
;
104125

105-
withPostFilter_PushDown
126+
127+
before limit
106128
required_capability: sample
107129

108130
FROM employees
109-
| SAMPLE .5 0
110-
| WHERE emp_no > 10050
111-
| STATS count = COUNT(*)
112-
| EVAL correct = count > 20 AND count < 80
113-
| KEEP correct
131+
| SAMPLE 0.5
132+
| LIMIT 20
133+
| STATS count = COUNT(), values_count = MV_COUNT(VALUES(emp_no)), avg_emp_no = AVG(emp_no)
134+
| EVAL is_expected = count == 20 AND
135+
values_count == 20 AND
136+
avg_emp_no > 10005 AND avg_emp_no < 10045
137+
| KEEP is_expected
114138
;
115139

116-
correct:boolean
140+
is_expected:boolean
117141
true
118142
;
119143

120-
withPostFilter_Operator
144+
145+
after limit
121146
required_capability: sample
122147

123148
FROM employees
124-
| WHERE LENGTH(CONCAT(last_name, "foo")) > 3
125-
| SAMPLE .5 0
126-
| WHERE emp_no > 10050
127-
| STATS count = COUNT(*)
128-
| EVAL correct = count > 20 AND count < 80
129-
| KEEP correct
149+
| LIMIT 50
150+
| SAMPLE 0.5
151+
| STATS count = COUNT(), values_count = MV_COUNT(VALUES(emp_no)), avg_emp_no = AVG(emp_no)
152+
| EVAL is_expected = count >= 10 AND count <= 90 AND
153+
values_count >= 5 AND values_count <= 45 AND
154+
avg_emp_no > 10005 AND avg_emp_no < 10045
155+
| KEEP is_expected
130156
;
131157

132-
correct:boolean
158+
is_expected:boolean
133159
true
134160
;
135161

136-
combined_PushDown
162+
163+
before mv_expand
137164
required_capability: sample
138165

139-
FROM employees
140-
| SAMPLE .6 11
141-
| EVAL e1 = emp_no + 1
142-
| SAMPLE .7 11111
143-
| STATS count = COUNT(e1)
144-
| EVAL correct = count > 50 AND count < 150
145-
| KEEP correct
166+
ROW x = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50], y = [1,2]
167+
| MV_EXPAND x
168+
| SAMPLE 0.85
169+
| MV_EXPAND y
170+
| STATS count = COUNT() BY x
171+
| STATS counts = VALUES(count)
172+
| EVAL is_expected = MV_COUNT(counts) == 1 AND MV_MIN(counts) == 2
173+
| KEEP is_expected
146174
;
147175

148-
correct:boolean
176+
is_expected:boolean
149177
true
150178
;
151179

152-
combined_Operator
180+
181+
after mv_expand
153182
required_capability: sample
154183

155-
FROM employees
156-
| WHERE LENGTH(CONCAT(last_name, "foo")) > 3
157-
| SAMPLE .6 11
158-
| EVAL e1 = emp_no + 1
159-
| SAMPLE .7 11111
160-
| STATS count = COUNT(e1)
161-
| EVAL correct = count > 50 AND count < 150
162-
| KEEP correct
184+
ROW x = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50], y = [1,2]
185+
| MV_EXPAND x
186+
| MV_EXPAND y
187+
| SAMPLE 0.85
188+
| STATS count = COUNT() BY x
189+
| STATS counts = VALUES(count)
190+
| EVAL is_expected = MV_COUNT(counts) == 2 AND MV_MIN(counts) == 1 AND MV_MAX(counts) == 2
191+
| KEEP is_expected
163192
;
164193

165-
correct:boolean
194+
is_expected:boolean
166195
true
167196
;
168197

169-
combined2_PushDown
198+
199+
multiple samples
170200
required_capability: sample
171201

172202
FROM employees
173-
| SAMPLE .6 11
174-
| EVAL e1 = emp_no + 1
175-
| SAMPLE .7 11111
176-
| EVAL e1s = e1::KEYWORD
177-
| SAMPLE .1
178-
| STATS count = COUNT(e1s)
179-
| EVAL correct = count > 0 AND count < 250
180-
| KEEP correct
203+
| SAMPLE 0.7
204+
| SAMPLE 0.8
205+
| SAMPLE 0.9
206+
| STATS count = COUNT(), values_count = MV_COUNT(VALUES(emp_no)), avg_emp_no = AVG(emp_no)
207+
| EVAL is_expected = count >= 40 AND count <= 160 AND
208+
values_count >= 20 AND values_count <= 80 AND
209+
avg_emp_no > 10010 AND avg_emp_no < 10090
210+
| KEEP is_expected
181211
;
182212

183-
correct:boolean
213+
is_expected:boolean
184214
true
185215
;
186216

187-
combined2_Operator
217+
218+
after stats
188219
required_capability: sample
189220

190221
FROM employees
191-
| WHERE LENGTH(CONCAT(last_name, "foo")) > 3
192-
| SAMPLE .6 11
193-
| EVAL e1 = emp_no + 1
194-
| SAMPLE .7 11111
195-
| EVAL e1s = e1::KEYWORD
196-
| SAMPLE .1
197-
| STATS count = COUNT(e1s)
198-
| EVAL correct = count > 0 AND count < 250
199-
| KEEP correct
200-
;
201-
202-
correct:boolean
222+
| SAMPLE 0.5
223+
| STATS avg_salary = AVG(salary) BY job_positions
224+
| SAMPLE 0.8
225+
| STATS count = COUNT(), values_count = MV_COUNT(VALUES(avg_salary)), avg_avg_salary = AVG(avg_salary)
226+
| EVAL is_expected = count >= 1 AND count <= 20 AND
227+
values_count >= 1 AND values_count <= 16 AND
228+
avg_avg_salary > 25000 AND avg_avg_salary < 75000
229+
| KEEP is_expected
230+
;
231+
232+
is_expected:boolean
203233
true
204234
;

x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -191,7 +191,7 @@ public class CsvTests extends ESTestCase {
191191

192192
@ParametersFactory(argumentFormatting = "%2$s.%3$s")
193193
public static List<Object[]> readScriptSpec() throws Exception {
194-
List<URL> urls = classpathResources("/*.csv-spec");
194+
List<URL> urls = classpathResources("/sample.csv-spec");
195195
assertThat("Not enough specs found " + urls, urls, hasSize(greaterThan(0)));
196196
return SpecReader.readScriptSpec(urls, specParser());
197197
}

0 commit comments

Comments
 (0)