|
1 | 1 | // Tests focused on the SAMPLE command |
| 2 | +// Note: this tests only basic behavior, because of limitations of the CSV tests. |
| 3 | +// Most tests assert that the count, average and sum of some values are within a |
| 4 | +// range. These stats should be correctly adjusted for the sampling. Furthermore, |
| 5 | +// they also assert the value of MV_COUNT(VALUES(...)), which is not adjusted for |
| 6 | +// the sampling and therefore gives the size of the sample. |
| 7 | +// All ranges are very loose, so that the tests should fail less than 1 in a billion. |
| 8 | +// The range checks are done in ES|QL, resulting in one boolean value (is_expected), |
| 9 | +// because the CSV tests don't support such assertions. |
2 | 10 |
|
3 | 11 | row |
4 | 12 | required_capability: sample |
5 | 13 |
|
6 | | -ROW x = 1 |
7 | | -| SAMPLE .99 9 |
| 14 | +ROW x = 1 | SAMPLE .999999999 |
8 | 15 | ; |
9 | 16 |
|
10 | 17 | x:integer |
11 | 18 | 1 |
12 | 19 | ; |
13 | 20 |
|
14 | | -topN_PushedDown |
| 21 | + |
| 22 | +row and mv_expand |
15 | 23 | required_capability: sample |
16 | 24 |
|
17 | | -FROM employees |
18 | | -| SAMPLE .5 0 |
19 | | -| SORT emp_no |
20 | | -| LIMIT 5 |
21 | | -| STATS empNoMax = MAX(emp_no) |
22 | | -| EVAL correct = empNoMax < 10030 |
23 | | -| KEEP correct |
| 25 | +ROW x = [1,2,3,4,5] | MV_EXPAND x | SAMPLE .999999999 |
24 | 26 | ; |
25 | 27 |
|
26 | | -correct:boolean |
27 | | -true |
| 28 | +x:integer |
| 29 | +1 |
| 30 | +2 |
| 31 | +3 |
| 32 | +4 |
| 33 | +5 |
28 | 34 | ; |
29 | 35 |
|
30 | | -topN_Operator |
| 36 | + |
| 37 | +adjust stats for sampling |
31 | 38 | required_capability: sample |
32 | 39 |
|
33 | 40 | FROM employees |
34 | | -| WHERE LENGTH(CONCAT(last_name, "foo")) > 3 |
35 | | -| SAMPLE .5 0 |
36 | | -| SORT emp_no |
37 | | -| LIMIT 5 |
38 | | -| STATS empNoMax = MAX(emp_no) |
39 | | -| EVAL correct = empNoMax < 10030 |
40 | | -| KEEP correct |
| 41 | + | SAMPLE 0.5 |
| 42 | + | STATS count = COUNT(), values_count = MV_COUNT(VALUES(emp_no)), avg_emp_no = AVG(emp_no), sum_emp_no = SUM(emp_no) |
| 43 | + | EVAL is_expected = count >= 40 AND count <= 160 AND |
| 44 | + values_count >= 20 AND values_count <= 80 AND |
| 45 | + avg_emp_no > 10010 AND avg_emp_no < 10090 AND |
| 46 | + sum_emp_no > 40*10010 AND sum_emp_no < 160*10090 |
| 47 | + | KEEP is_expected |
41 | 48 | ; |
42 | 49 |
|
43 | | -correct:boolean |
| 50 | +is_expected:boolean |
44 | 51 | true |
45 | 52 | ; |
46 | 53 |
|
47 | | -aggCount_PushedDown |
| 54 | + |
| 55 | +before where |
48 | 56 | required_capability: sample |
49 | 57 |
|
50 | 58 | FROM employees |
51 | | -| SAMPLE .5 123 |
52 | | -| STATS count = COUNT(*) |
53 | | -| EVAL correct = count > 50 AND count < 150 |
54 | | -| KEEP correct |
| 59 | + | SAMPLE 0.5 |
| 60 | + | WHERE emp_no > 10050 |
| 61 | + | STATS count = COUNT(), values_count = MV_COUNT(VALUES(emp_no)), avg_emp_no = AVG(emp_no) |
| 62 | + | EVAL is_expected = count >= 10 AND count <= 90 AND |
| 63 | + values_count >= 5 AND values_count <= 45 AND |
| 64 | + avg_emp_no > 10055 AND avg_emp_no < 10095 |
| 65 | + | KEEP is_expected |
55 | 66 | ; |
56 | 67 |
|
57 | | -correct:boolean |
| 68 | +is_expected:boolean |
58 | 69 | true |
59 | 70 | ; |
60 | 71 |
|
61 | | -aggCount_Operator |
| 72 | + |
| 73 | +after where |
62 | 74 | required_capability: sample |
63 | 75 |
|
64 | 76 | FROM employees |
65 | | -| WHERE LENGTH(CONCAT(last_name, "foo")) > 3 |
66 | | -| SAMPLE .5 123 |
67 | | -| STATS count = COUNT(*) |
68 | | -| EVAL correct = count > 50 AND count < 150 |
69 | | -| KEEP correct |
| 77 | + | WHERE emp_no <= 10050 |
| 78 | + | SAMPLE 0.5 |
| 79 | + | STATS count = COUNT(), values_count = MV_COUNT(VALUES(emp_no)), avg_emp_no = AVG(emp_no) |
| 80 | + | EVAL is_expected = count >= 10 AND count <= 90 AND |
| 81 | + values_count >= 5 AND values_count <= 45 AND |
| 82 | + avg_emp_no > 10005 AND avg_emp_no < 10045 |
| 83 | + | KEEP is_expected |
70 | 84 | ; |
71 | 85 |
|
72 | | -correct:boolean |
| 86 | +is_expected:boolean |
73 | 87 | true |
74 | 88 | ; |
75 | 89 |
|
76 | | -aggCountNoSeed_PushedDown |
| 90 | + |
| 91 | +before sort |
77 | 92 | required_capability: sample |
78 | 93 |
|
79 | 94 | FROM employees |
80 | | -| SAMPLE .5 |
81 | | -| STATS count = COUNT(*) |
82 | | -| EVAL correct = count > 50 AND count < 150 |
83 | | -| KEEP correct |
| 95 | + | SAMPLE 0.5 |
| 96 | + | SORT emp_no |
| 97 | + | STATS count = COUNT(), values_count = MV_COUNT(VALUES(emp_no)), avg_emp_no = AVG(emp_no) |
| 98 | + | EVAL is_expected = count >= 40 AND count <= 160 AND |
| 99 | + values_count >= 20 AND values_count <= 80 AND |
| 100 | + avg_emp_no > 10010 AND avg_emp_no < 10090 |
| 101 | + | KEEP is_expected |
84 | 102 | ; |
85 | 103 |
|
86 | | -correct:boolean |
| 104 | +is_expected:boolean |
87 | 105 | true |
88 | 106 | ; |
89 | 107 |
|
90 | | -aggCountNoSeed_Operator |
| 108 | + |
| 109 | +after sort |
91 | 110 | required_capability: sample |
92 | 111 |
|
93 | 112 | FROM employees |
94 | | -| WHERE LENGTH(CONCAT(last_name, "foo")) > 3 |
95 | | -| SAMPLE .5 |
96 | | -| STATS count = COUNT(*) |
97 | | -| EVAL inRange = count > 50 AND count < 150 |
98 | | -| KEEP inRange |
| 113 | + | SORT emp_no |
| 114 | + | SAMPLE 0.5 |
| 115 | + | STATS count = COUNT(), values_count = MV_COUNT(VALUES(emp_no)), avg_emp_no = AVG(emp_no) |
| 116 | + | EVAL is_expected = count >= 40 AND count <= 160 AND |
| 117 | + values_count >= 20 AND values_count <= 80 AND |
| 118 | + avg_emp_no > 10010 AND avg_emp_no < 10090 |
| 119 | + | KEEP is_expected |
99 | 120 | ; |
100 | 121 |
|
101 | | -inRange:boolean |
| 122 | +is_expected:boolean |
102 | 123 | true |
103 | 124 | ; |
104 | 125 |
|
105 | | -withPostFilter_PushDown |
| 126 | + |
| 127 | +before limit |
106 | 128 | required_capability: sample |
107 | 129 |
|
108 | 130 | FROM employees |
109 | | -| SAMPLE .5 0 |
110 | | -| WHERE emp_no > 10050 |
111 | | -| STATS count = COUNT(*) |
112 | | -| EVAL correct = count > 20 AND count < 80 |
113 | | -| KEEP correct |
| 131 | + | SAMPLE 0.5 |
| 132 | + | LIMIT 20 |
| 133 | + | STATS count = COUNT(), values_count = MV_COUNT(VALUES(emp_no)), avg_emp_no = AVG(emp_no) |
| 134 | + | EVAL is_expected = count == 20 AND |
| 135 | + values_count == 20 AND |
| 136 | + avg_emp_no > 10005 AND avg_emp_no < 10045 |
| 137 | + | KEEP is_expected |
114 | 138 | ; |
115 | 139 |
|
116 | | -correct:boolean |
| 140 | +is_expected:boolean |
117 | 141 | true |
118 | 142 | ; |
119 | 143 |
|
120 | | -withPostFilter_Operator |
| 144 | + |
| 145 | +after limit |
121 | 146 | required_capability: sample |
122 | 147 |
|
123 | 148 | FROM employees |
124 | | -| WHERE LENGTH(CONCAT(last_name, "foo")) > 3 |
125 | | -| SAMPLE .5 0 |
126 | | -| WHERE emp_no > 10050 |
127 | | -| STATS count = COUNT(*) |
128 | | -| EVAL correct = count > 20 AND count < 80 |
129 | | -| KEEP correct |
| 149 | + | LIMIT 50 |
| 150 | + | SAMPLE 0.5 |
| 151 | + | STATS count = COUNT(), values_count = MV_COUNT(VALUES(emp_no)), avg_emp_no = AVG(emp_no) |
| 152 | + | EVAL is_expected = count >= 10 AND count <= 90 AND |
| 153 | + values_count >= 5 AND values_count <= 45 AND |
| 154 | + avg_emp_no > 10005 AND avg_emp_no < 10045 |
| 155 | + | KEEP is_expected |
130 | 156 | ; |
131 | 157 |
|
132 | | -correct:boolean |
| 158 | +is_expected:boolean |
133 | 159 | true |
134 | 160 | ; |
135 | 161 |
|
136 | | -combined_PushDown |
| 162 | + |
| 163 | +before mv_expand |
137 | 164 | required_capability: sample |
138 | 165 |
|
139 | | -FROM employees |
140 | | -| SAMPLE .6 11 |
141 | | -| EVAL e1 = emp_no + 1 |
142 | | -| SAMPLE .7 11111 |
143 | | -| STATS count = COUNT(e1) |
144 | | -| EVAL correct = count > 50 AND count < 150 |
145 | | -| KEEP correct |
| 166 | +ROW x = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50], y = [1,2] |
| 167 | + | MV_EXPAND x |
| 168 | + | SAMPLE 0.85 |
| 169 | + | MV_EXPAND y |
| 170 | + | STATS count = COUNT() BY x |
| 171 | + | STATS counts = VALUES(count) |
| 172 | + | EVAL is_expected = MV_COUNT(counts) == 1 AND MV_MIN(counts) == 2 |
| 173 | + | KEEP is_expected |
146 | 174 | ; |
147 | 175 |
|
148 | | -correct:boolean |
| 176 | +is_expected:boolean |
149 | 177 | true |
150 | 178 | ; |
151 | 179 |
|
152 | | -combined_Operator |
| 180 | + |
| 181 | +after mv_expand |
153 | 182 | required_capability: sample |
154 | 183 |
|
155 | | -FROM employees |
156 | | -| WHERE LENGTH(CONCAT(last_name, "foo")) > 3 |
157 | | -| SAMPLE .6 11 |
158 | | -| EVAL e1 = emp_no + 1 |
159 | | -| SAMPLE .7 11111 |
160 | | -| STATS count = COUNT(e1) |
161 | | -| EVAL correct = count > 50 AND count < 150 |
162 | | -| KEEP correct |
| 184 | +ROW x = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50], y = [1,2] |
| 185 | + | MV_EXPAND x |
| 186 | + | MV_EXPAND y |
| 187 | + | SAMPLE 0.85 |
| 188 | + | STATS count = COUNT() BY x |
| 189 | + | STATS counts = VALUES(count) |
| 190 | + | EVAL is_expected = MV_COUNT(counts) == 2 AND MV_MIN(counts) == 1 AND MV_MAX(counts) == 2 |
| 191 | + | KEEP is_expected |
163 | 192 | ; |
164 | 193 |
|
165 | | -correct:boolean |
| 194 | +is_expected:boolean |
166 | 195 | true |
167 | 196 | ; |
168 | 197 |
|
169 | | -combined2_PushDown |
| 198 | + |
| 199 | +multiple samples |
170 | 200 | required_capability: sample |
171 | 201 |
|
172 | 202 | FROM employees |
173 | | -| SAMPLE .6 11 |
174 | | -| EVAL e1 = emp_no + 1 |
175 | | -| SAMPLE .7 11111 |
176 | | -| EVAL e1s = e1::KEYWORD |
177 | | -| SAMPLE .1 |
178 | | -| STATS count = COUNT(e1s) |
179 | | -| EVAL correct = count > 0 AND count < 250 |
180 | | -| KEEP correct |
| 203 | + | SAMPLE 0.7 |
| 204 | + | SAMPLE 0.8 |
| 205 | + | SAMPLE 0.9 |
| 206 | + | STATS count = COUNT(), values_count = MV_COUNT(VALUES(emp_no)), avg_emp_no = AVG(emp_no) |
| 207 | + | EVAL is_expected = count >= 40 AND count <= 160 AND |
| 208 | + values_count >= 20 AND values_count <= 80 AND |
| 209 | + avg_emp_no > 10010 AND avg_emp_no < 10090 |
| 210 | + | KEEP is_expected |
181 | 211 | ; |
182 | 212 |
|
183 | | -correct:boolean |
| 213 | +is_expected:boolean |
184 | 214 | true |
185 | 215 | ; |
186 | 216 |
|
187 | | -combined2_Operator |
| 217 | + |
| 218 | +after stats |
188 | 219 | required_capability: sample |
189 | 220 |
|
190 | 221 | FROM employees |
191 | | -| WHERE LENGTH(CONCAT(last_name, "foo")) > 3 |
192 | | -| SAMPLE .6 11 |
193 | | -| EVAL e1 = emp_no + 1 |
194 | | -| SAMPLE .7 11111 |
195 | | -| EVAL e1s = e1::KEYWORD |
196 | | -| SAMPLE .1 |
197 | | -| STATS count = COUNT(e1s) |
198 | | -| EVAL correct = count > 0 AND count < 250 |
199 | | -| KEEP correct |
200 | | -; |
201 | | - |
202 | | -correct:boolean |
| 222 | + | SAMPLE 0.5 |
| 223 | + | STATS avg_salary = AVG(salary) BY job_positions |
| 224 | + | SAMPLE 0.8 |
| 225 | + | STATS count = COUNT(), values_count = MV_COUNT(VALUES(avg_salary)), avg_avg_salary = AVG(avg_salary) |
| 226 | + | EVAL is_expected = count >= 1 AND count <= 20 AND |
| 227 | + values_count >= 1 AND values_count <= 16 AND |
| 228 | + avg_avg_salary > 25000 AND avg_avg_salary < 75000 |
| 229 | + | KEEP is_expected |
| 230 | +; |
| 231 | + |
| 232 | +is_expected:boolean |
203 | 233 | true |
204 | 234 | ; |
0 commit comments