@@ -75,8 +75,9 @@ public void testTimechartBasic() {
7575
7676 RelNode root = getRelNode (ppl );
7777 String expectedSparkSql =
78- "SELECT `SPAN`(`@timestamp`, 1, 'm') `@timestamp`, COUNT(*) `count`\n "
78+ "SELECT `SPAN`(`@timestamp`, 1, 'm') `@timestamp`, COUNT(*) `count() `\n "
7979 + "FROM `scott`.`events`\n "
80+ + "WHERE `@timestamp` IS NOT NULL\n "
8081 + "GROUP BY `SPAN`(`@timestamp`, 1, 'm')\n "
8182 + "ORDER BY 1 NULLS LAST" ;
8283 verifyPPLToSparkSQL (root , expectedSparkSql );
@@ -92,8 +93,9 @@ public void testTimechartPerSecond() {
9293 + "FROM (SELECT `SPAN`(`@timestamp`, 1, 'm') `@timestamp`, SUM(`cpu_usage`)"
9394 + " `per_second(cpu_usage)`\n "
9495 + "FROM `scott`.`events`\n "
96+ + "WHERE `@timestamp` IS NOT NULL AND `cpu_usage` IS NOT NULL\n "
9597 + "GROUP BY `SPAN`(`@timestamp`, 1, 'm')\n "
96- + "ORDER BY 1 NULLS LAST) `t2 `" );
98+ + "ORDER BY 1 NULLS LAST) `t3 `" );
9799 }
98100
99101 @ Test
@@ -106,8 +108,9 @@ public void testTimechartPerMinute() {
106108 + "FROM (SELECT `SPAN`(`@timestamp`, 1, 'm') `@timestamp`, SUM(`cpu_usage`)"
107109 + " `per_minute(cpu_usage)`\n "
108110 + "FROM `scott`.`events`\n "
111+ + "WHERE `@timestamp` IS NOT NULL AND `cpu_usage` IS NOT NULL\n "
109112 + "GROUP BY `SPAN`(`@timestamp`, 1, 'm')\n "
110- + "ORDER BY 1 NULLS LAST) `t2 `" );
113+ + "ORDER BY 1 NULLS LAST) `t3 `" );
111114 }
112115
113116 @ Test
@@ -120,8 +123,9 @@ public void testTimechartPerHour() {
120123 + "FROM (SELECT `SPAN`(`@timestamp`, 1, 'm') `@timestamp`, SUM(`cpu_usage`)"
121124 + " `per_hour(cpu_usage)`\n "
122125 + "FROM `scott`.`events`\n "
126+ + "WHERE `@timestamp` IS NOT NULL AND `cpu_usage` IS NOT NULL\n "
123127 + "GROUP BY `SPAN`(`@timestamp`, 1, 'm')\n "
124- + "ORDER BY 1 NULLS LAST) `t2 `" );
128+ + "ORDER BY 1 NULLS LAST) `t3 `" );
125129 }
126130
127131 @ Test
@@ -134,8 +138,9 @@ public void testTimechartPerDay() {
134138 + "FROM (SELECT `SPAN`(`@timestamp`, 1, 'm') `@timestamp`, SUM(`cpu_usage`)"
135139 + " `per_day(cpu_usage)`\n "
136140 + "FROM `scott`.`events`\n "
141+ + "WHERE `@timestamp` IS NOT NULL AND `cpu_usage` IS NOT NULL\n "
137142 + "GROUP BY `SPAN`(`@timestamp`, 1, 'm')\n "
138- + "ORDER BY 1 NULLS LAST) `t2 `" );
143+ + "ORDER BY 1 NULLS LAST) `t3 `" );
139144 }
140145
141146 @ Test
@@ -144,8 +149,9 @@ public void testTimechartWithSpan() {
144149
145150 RelNode root = getRelNode (ppl );
146151 String expectedSparkSql =
147- "SELECT `SPAN`(`@timestamp`, 1, 'h') `@timestamp`, COUNT(*) `count`\n "
152+ "SELECT `SPAN`(`@timestamp`, 1, 'h') `@timestamp`, COUNT(*) `count() `\n "
148153 + "FROM `scott`.`events`\n "
154+ + "WHERE `@timestamp` IS NOT NULL\n "
149155 + "GROUP BY `SPAN`(`@timestamp`, 1, 'h')\n "
150156 + "ORDER BY 1 NULLS LAST" ;
151157 verifyPPLToSparkSQL (root , expectedSparkSql );
@@ -157,49 +163,24 @@ public void testTimechartWithLimit() {
157163
158164 RelNode root = getRelNode (ppl );
159165 String expectedSparkSql =
160- "SELECT `@timestamp`, `host`, SUM(`actual_count`) `count`\n "
161- + "FROM (SELECT CAST(`t1`.`@timestamp` AS TIMESTAMP) `@timestamp`, CASE WHEN"
162- + " `t7`.`host` IS NOT NULL THEN `t1`.`host` ELSE CASE WHEN `t1`.`host` IS NULL THEN"
163- + " NULL ELSE 'OTHER' END END `host`, SUM(`t1`.`$f2_0`) `actual_count`\n "
164- + "FROM (SELECT `SPAN`(`@timestamp`, 1, 'm') `@timestamp`, `host`, COUNT(*) `$f2_0`\n "
166+ "SELECT `t2`.`@timestamp`, CASE WHEN `t2`.`host` IS NULL THEN 'NULL' WHEN"
167+ + " `t9`.`_row_number_chart_` <= 3 THEN `t2`.`host` ELSE 'OTHER' END `host`,"
168+ + " SUM(`t2`.`count()`) `count()`\n "
169+ + "FROM (SELECT `SPAN`(`@timestamp`, 1, 'm') `@timestamp`, `host`, COUNT(*) `count()`\n "
165170 + "FROM `scott`.`events`\n "
166- + "GROUP BY `host`, `SPAN`(`@timestamp`, 1, 'm')) `t1`\n "
167- + "LEFT JOIN (SELECT `host`, SUM(`$f2_0`) `grand_total`\n "
168- + "FROM (SELECT `SPAN`(`@timestamp`, 1, 'm') `@timestamp`, `host`, COUNT(*) `$f2_0`\n "
171+ + "WHERE `@timestamp` IS NOT NULL\n "
172+ + "GROUP BY `host`, `SPAN`(`@timestamp`, 1, 'm')) `t2`\n "
173+ + "LEFT JOIN (SELECT `host`, SUM(`count()`) `__grand_total__`, ROW_NUMBER() OVER (ORDER"
174+ + " BY SUM(`count()`) DESC) `_row_number_chart_`\n "
175+ + "FROM (SELECT `host`, COUNT(*) `count()`\n "
169176 + "FROM `scott`.`events`\n "
170- + "GROUP BY `host`, `SPAN`(`@timestamp`, 1, 'm')) `t4`\n "
177+ + "WHERE `@timestamp` IS NOT NULL\n "
178+ + "GROUP BY `host`, `SPAN`(`@timestamp`, 1, 'm')) `t6`\n "
171179 + "WHERE `host` IS NOT NULL\n "
172- + "GROUP BY `host`\n "
173- + "ORDER BY 2 DESC NULLS FIRST\n "
174- + "LIMIT 3) `t7` ON `t1`.`host` IS NOT DISTINCT FROM `t7`.`host`\n "
175- + "GROUP BY CAST(`t1`.`@timestamp` AS TIMESTAMP), CASE WHEN `t7`.`host` IS NOT NULL"
176- + " THEN `t1`.`host` ELSE CASE WHEN `t1`.`host` IS NULL THEN NULL ELSE 'OTHER' END"
177- + " END\n "
178- + "UNION\n "
179- + "SELECT CAST(`t13`.`@timestamp` AS TIMESTAMP) `@timestamp`, `t24`.`$f0` `host`, 0"
180- + " `count`\n "
181- + "FROM (SELECT `@timestamp`\n "
182- + "FROM (SELECT `SPAN`(`@timestamp`, 1, 'm') `@timestamp`\n "
183- + "FROM `scott`.`events`\n "
184- + "GROUP BY `host`, `SPAN`(`@timestamp`, 1, 'm')) `t12`\n "
185- + "GROUP BY `@timestamp`) `t13`\n "
186- + "CROSS JOIN (SELECT CASE WHEN `t22`.`host` IS NOT NULL THEN `t16`.`host` ELSE CASE"
187- + " WHEN `t16`.`host` IS NULL THEN NULL ELSE 'OTHER' END END `$f0`\n "
188- + "FROM (SELECT `SPAN`(`@timestamp`, 1, 'm') `@timestamp`, `host`, COUNT(*) `$f2_0`\n "
189- + "FROM `scott`.`events`\n "
190- + "GROUP BY `host`, `SPAN`(`@timestamp`, 1, 'm')) `t16`\n "
191- + "LEFT JOIN (SELECT `host`, SUM(`$f2_0`) `grand_total`\n "
192- + "FROM (SELECT `SPAN`(`@timestamp`, 1, 'm') `@timestamp`, `host`, COUNT(*) `$f2_0`\n "
193- + "FROM `scott`.`events`\n "
194- + "GROUP BY `host`, `SPAN`(`@timestamp`, 1, 'm')) `t19`\n "
195- + "WHERE `host` IS NOT NULL\n "
196- + "GROUP BY `host`\n "
197- + "ORDER BY 2 DESC NULLS FIRST\n "
198- + "LIMIT 3) `t22` ON `t16`.`host` IS NOT DISTINCT FROM `t22`.`host`\n "
199- + "GROUP BY CASE WHEN `t22`.`host` IS NOT NULL THEN `t16`.`host` ELSE CASE WHEN"
200- + " `t16`.`host` IS NULL THEN NULL ELSE 'OTHER' END END) `t24`) `t26`\n "
201- + "GROUP BY `@timestamp`, `host`\n "
202- + "ORDER BY `@timestamp` NULLS LAST, `host` NULLS LAST" ;
180+ + "GROUP BY `host`) `t9` ON `t2`.`host` = `t9`.`host`\n "
181+ + "GROUP BY `t2`.`@timestamp`, CASE WHEN `t2`.`host` IS NULL THEN 'NULL' WHEN"
182+ + " `t9`.`_row_number_chart_` <= 3 THEN `t2`.`host` ELSE 'OTHER' END\n "
183+ + "ORDER BY `t2`.`@timestamp` NULLS LAST, 2 NULLS LAST" ;
203184 verifyPPLToSparkSQL (root , expectedSparkSql );
204185 }
205186
@@ -208,49 +189,24 @@ public void testTimechartWithSpan1h() {
208189 String ppl = "source=events | timechart span=1h count() by host" ;
209190 RelNode root = getRelNode (ppl );
210191 String expectedSparkSql =
211- "SELECT `@timestamp`, `host`, SUM(`actual_count`) `count`\n "
212- + "FROM (SELECT CAST(`t1`.`@timestamp` AS TIMESTAMP) `@timestamp`, CASE WHEN"
213- + " `t7`.`host` IS NOT NULL THEN `t1`.`host` ELSE CASE WHEN `t1`.`host` IS NULL THEN"
214- + " NULL ELSE 'OTHER' END END `host`, SUM(`t1`.`$f2_0`) `actual_count`\n "
215- + "FROM (SELECT `SPAN`(`@timestamp`, 1, 'h') `@timestamp`, `host`, COUNT(*) `$f2_0`\n "
216- + "FROM `scott`.`events`\n "
217- + "GROUP BY `host`, `SPAN`(`@timestamp`, 1, 'h')) `t1`\n "
218- + "LEFT JOIN (SELECT `host`, SUM(`$f2_0`) `grand_total`\n "
219- + "FROM (SELECT `SPAN`(`@timestamp`, 1, 'h') `@timestamp`, `host`, COUNT(*) `$f2_0`\n "
220- + "FROM `scott`.`events`\n "
221- + "GROUP BY `host`, `SPAN`(`@timestamp`, 1, 'h')) `t4`\n "
222- + "WHERE `host` IS NOT NULL\n "
223- + "GROUP BY `host`\n "
224- + "ORDER BY 2 DESC NULLS FIRST\n "
225- + "LIMIT 10) `t7` ON `t1`.`host` IS NOT DISTINCT FROM `t7`.`host`\n "
226- + "GROUP BY CAST(`t1`.`@timestamp` AS TIMESTAMP), CASE WHEN `t7`.`host` IS NOT NULL"
227- + " THEN `t1`.`host` ELSE CASE WHEN `t1`.`host` IS NULL THEN NULL ELSE 'OTHER' END"
228- + " END\n "
229- + "UNION\n "
230- + "SELECT CAST(`t13`.`@timestamp` AS TIMESTAMP) `@timestamp`, `t24`.`$f0` `host`, 0"
231- + " `count`\n "
232- + "FROM (SELECT `@timestamp`\n "
233- + "FROM (SELECT `SPAN`(`@timestamp`, 1, 'h') `@timestamp`\n "
192+ "SELECT `t2`.`@timestamp`, CASE WHEN `t2`.`host` IS NULL THEN 'NULL' WHEN"
193+ + " `t9`.`_row_number_chart_` <= 10 THEN `t2`.`host` ELSE 'OTHER' END `host`,"
194+ + " SUM(`t2`.`count()`) `count()`\n "
195+ + "FROM (SELECT `SPAN`(`@timestamp`, 1, 'h') `@timestamp`, `host`, COUNT(*) `count()`\n "
234196 + "FROM `scott`.`events`\n "
235- + "GROUP BY `host`, `SPAN`(` @timestamp`, 1, 'h')) `t12` \n "
236- + "GROUP BY `@timestamp`) `t13 `\n "
237- + "CROSS JOIN (SELECT CASE WHEN `t22`.` host` IS NOT NULL THEN `t16`.`host` ELSE CASE "
238- + " WHEN `t16`.`host` IS NULL THEN NULL ELSE 'OTHER' END END `$f0 `\n "
239- + "FROM (SELECT `SPAN`(`@timestamp`, 1, 'h') `@timestamp`, ` host`, COUNT(*) `$f2_0 `\n "
197+ + "WHERE ` @timestamp` IS NOT NULL \n "
198+ + "GROUP BY `host`, `SPAN`(` @timestamp`, 1, 'h')) `t2 `\n "
199+ + "LEFT JOIN (SELECT ` host`, SUM(`count()`) `__grand_total__`, ROW_NUMBER() OVER (ORDER "
200+ + " BY SUM(`count()`) DESC) `_row_number_chart_ `\n "
201+ + "FROM (SELECT `host`, COUNT(*) `count() `\n "
240202 + "FROM `scott`.`events`\n "
241- + "GROUP BY `host`, `SPAN`(`@timestamp`, 1, 'h')) `t16`\n "
242- + "LEFT JOIN (SELECT `host`, SUM(`$f2_0`) `grand_total`\n "
243- + "FROM (SELECT `SPAN`(`@timestamp`, 1, 'h') `@timestamp`, `host`, COUNT(*) `$f2_0`\n "
244- + "FROM `scott`.`events`\n "
245- + "GROUP BY `host`, `SPAN`(`@timestamp`, 1, 'h')) `t19`\n "
203+ + "WHERE `@timestamp` IS NOT NULL\n "
204+ + "GROUP BY `host`, `SPAN`(`@timestamp`, 1, 'h')) `t6`\n "
246205 + "WHERE `host` IS NOT NULL\n "
247- + "GROUP BY `host`\n "
248- + "ORDER BY 2 DESC NULLS FIRST\n "
249- + "LIMIT 10) `t22` ON `t16`.`host` IS NOT DISTINCT FROM `t22`.`host`\n "
250- + "GROUP BY CASE WHEN `t22`.`host` IS NOT NULL THEN `t16`.`host` ELSE CASE WHEN"
251- + " `t16`.`host` IS NULL THEN NULL ELSE 'OTHER' END END) `t24`) `t26`\n "
252- + "GROUP BY `@timestamp`, `host`\n "
253- + "ORDER BY `@timestamp` NULLS LAST, `host` NULLS LAST" ;
206+ + "GROUP BY `host`) `t9` ON `t2`.`host` = `t9`.`host`\n "
207+ + "GROUP BY `t2`.`@timestamp`, CASE WHEN `t2`.`host` IS NULL THEN 'NULL' WHEN"
208+ + " `t9`.`_row_number_chart_` <= 10 THEN `t2`.`host` ELSE 'OTHER' END\n "
209+ + "ORDER BY `t2`.`@timestamp` NULLS LAST, 2 NULLS LAST" ;
254210 verifyPPLToSparkSQL (root , expectedSparkSql );
255211 }
256212
@@ -259,25 +215,25 @@ public void testTimechartWithSpan1m() {
259215 String ppl = "source=events | timechart span=1m avg(cpu_usage) by region" ;
260216 RelNode root = getRelNode (ppl );
261217 String expectedSparkSql =
262- "SELECT `t1 `.`@timestamp`, CASE WHEN `t7 `.`region` IS NOT NULL THEN `t1`.`region` ELSE CASE "
263- + " WHEN `t1 `.`region` IS NULL THEN NULL ELSE 'OTHER' END END `region`, AVG(`t1`.`$f2`) "
264- + " `avg(cpu_usage)`\n "
218+ "SELECT `t2 `.`@timestamp`, CASE WHEN `t2 `.`region` IS NULL THEN 'NULL' WHEN "
219+ + " `t9 `.`_row_number_chart_` <= 10 THEN `t2`.`region` ELSE 'OTHER' END `region`,"
220+ + " AVG(`t2`.`avg(cpu_usage)`) `avg(cpu_usage)`\n "
265221 + "FROM (SELECT `SPAN`(`@timestamp`, 1, 'm') `@timestamp`, `region`, AVG(`cpu_usage`)"
266- + " `$f2 `\n "
222+ + " `avg(cpu_usage) `\n "
267223 + "FROM `scott`.`events`\n "
268- + "GROUP BY `region`, `SPAN`(`@timestamp`, 1, 'm')) `t1`\n "
269- + "LEFT JOIN (SELECT `region`, AVG(`$f2`) `grand_total`\n "
270- + "FROM (SELECT `SPAN`(`@timestamp`, 1, 'm') `@timestamp`, `region`, AVG(`cpu_usage`)"
271- + " `$f2`\n "
224+ + "WHERE `@timestamp` IS NOT NULL AND `cpu_usage` IS NOT NULL\n "
225+ + "GROUP BY `region`, `SPAN`(`@timestamp`, 1, 'm')) `t2`\n "
226+ + "LEFT JOIN (SELECT `region`, SUM(`avg(cpu_usage)`) `__grand_total__`, ROW_NUMBER()"
227+ + " OVER (ORDER BY SUM(`avg(cpu_usage)`) DESC) `_row_number_chart_`\n "
228+ + "FROM (SELECT `region`, AVG(`cpu_usage`) `avg(cpu_usage)`\n "
272229 + "FROM `scott`.`events`\n "
273- + "GROUP BY `region`, `SPAN`(`@timestamp`, 1, 'm')) `t4`\n "
230+ + "WHERE `@timestamp` IS NOT NULL AND `cpu_usage` IS NOT NULL\n "
231+ + "GROUP BY `region`, `SPAN`(`@timestamp`, 1, 'm')) `t6`\n "
274232 + "WHERE `region` IS NOT NULL\n "
275- + "GROUP BY `region`\n "
276- + "ORDER BY 2 DESC NULLS FIRST\n "
277- + "LIMIT 10) `t7` ON `t1`.`region` = `t7`.`region`\n "
278- + "GROUP BY `t1`.`@timestamp`, CASE WHEN `t7`.`region` IS NOT NULL THEN `t1`.`region`"
279- + " ELSE CASE WHEN `t1`.`region` IS NULL THEN NULL ELSE 'OTHER' END END\n "
280- + "ORDER BY `t1`.`@timestamp` NULLS LAST, 2 NULLS LAST" ;
233+ + "GROUP BY `region`) `t9` ON `t2`.`region` = `t9`.`region`\n "
234+ + "GROUP BY `t2`.`@timestamp`, CASE WHEN `t2`.`region` IS NULL THEN 'NULL' WHEN"
235+ + " `t9`.`_row_number_chart_` <= 10 THEN `t2`.`region` ELSE 'OTHER' END\n "
236+ + "ORDER BY `t2`.`@timestamp` NULLS LAST, 2 NULLS LAST" ;
281237 verifyPPLToSparkSQL (root , expectedSparkSql );
282238 }
283239
@@ -296,27 +252,26 @@ public void testTimechartWithLimitAndUseOtherFalse() {
296252
297253 RelNode root = getRelNode (ppl );
298254 String expectedSparkSql =
299- "SELECT `t1 `.`@timestamp`, CASE WHEN `t7 `.`host` IS NOT NULL THEN `t1`.`host` ELSE CASE "
300- + " WHEN `t1 `.`host` IS NULL THEN NULL ELSE 'OTHER' END END `host`, AVG(`t1`.`$f2`) "
301- + " `avg(cpu_usage)`\n "
255+ "SELECT `t2 `.`@timestamp`, CASE WHEN `t2 `.`host` IS NULL THEN 'NULL' WHEN "
256+ + " `t9 `.`_row_number_chart_` <= 3 THEN `t2`.`host` ELSE 'OTHER' END `host`,"
257+ + " AVG(`t2`.`avg(cpu_usage)`) `avg(cpu_usage)`\n "
302258 + "FROM (SELECT `SPAN`(`@timestamp`, 1, 'h') `@timestamp`, `host`, AVG(`cpu_usage`)"
303- + " `$f2 `\n "
259+ + " `avg(cpu_usage) `\n "
304260 + "FROM `scott`.`events`\n "
305- + "GROUP BY `host`, `SPAN`(`@timestamp`, 1, 'h')) `t1`\n "
306- + "LEFT JOIN (SELECT `host`, AVG(`$f2`) `grand_total`\n "
307- + "FROM (SELECT `SPAN`(`@timestamp`, 1, 'h') `@timestamp`, `host`, AVG(`cpu_usage`)"
308- + " `$f2`\n "
261+ + "WHERE `@timestamp` IS NOT NULL AND `cpu_usage` IS NOT NULL\n "
262+ + "GROUP BY `host`, `SPAN`(`@timestamp`, 1, 'h')) `t2`\n "
263+ + "LEFT JOIN (SELECT `host`, SUM(`avg(cpu_usage)`) `__grand_total__`, ROW_NUMBER() OVER"
264+ + " (ORDER BY SUM(`avg(cpu_usage)`) DESC) `_row_number_chart_`\n "
265+ + "FROM (SELECT `host`, AVG(`cpu_usage`) `avg(cpu_usage)`\n "
309266 + "FROM `scott`.`events`\n "
310- + "GROUP BY `host`, `SPAN`(`@timestamp`, 1, 'h')) `t4`\n "
267+ + "WHERE `@timestamp` IS NOT NULL AND `cpu_usage` IS NOT NULL\n "
268+ + "GROUP BY `host`, `SPAN`(`@timestamp`, 1, 'h')) `t6`\n "
311269 + "WHERE `host` IS NOT NULL\n "
312- + "GROUP BY `host`\n "
313- + "ORDER BY 2 DESC NULLS FIRST\n "
314- + "LIMIT 3) `t7` ON `t1`.`host` = `t7`.`host`\n "
315- + "GROUP BY `t1`.`@timestamp`, CASE WHEN `t7`.`host` IS NOT NULL THEN `t1`.`host` ELSE"
316- + " CASE WHEN `t1`.`host` IS NULL THEN NULL ELSE 'OTHER' END END\n "
317- + "HAVING CASE WHEN `t7`.`host` IS NOT NULL THEN `t1`.`host` ELSE CASE WHEN `t1`.`host`"
318- + " IS NULL THEN NULL ELSE 'OTHER' END END <> 'OTHER'\n "
319- + "ORDER BY `t1`.`@timestamp` NULLS LAST, 2 NULLS LAST" ;
270+ + "GROUP BY `host`) `t9` ON `t2`.`host` = `t9`.`host`\n "
271+ + "WHERE `t9`.`_row_number_chart_` <= 3\n "
272+ + "GROUP BY `t2`.`@timestamp`, CASE WHEN `t2`.`host` IS NULL THEN 'NULL' WHEN"
273+ + " `t9`.`_row_number_chart_` <= 3 THEN `t2`.`host` ELSE 'OTHER' END\n "
274+ + "ORDER BY `t2`.`@timestamp` NULLS LAST, 2 NULLS LAST" ;
320275 verifyPPLToSparkSQL (root , expectedSparkSql );
321276 }
322277
0 commit comments