|
| 1 | +-- |
| 2 | +-- Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
| 3 | +-- |
| 4 | +-- |
| 5 | +-- AGGREGATES [Part 1] |
| 6 | +-- https://github.com/postgres/postgres/blob/REL_12_BETA1/src/test/regress/sql/aggregates.sql#L1-L143 |
| 7 | + |
| 8 | +-- avoid bit-exact output here because operations may not be bit-exact. |
| 9 | +-- SET extra_float_digits = 0; |
| 10 | + |
| 11 | +-- This test file was converted from pgSQL/aggregates_part1.sql. |
| 12 | +-- Note that currently registered UDF returns a string. So there are some differences, for instance |
| 13 | +-- in string cast within UDF in Scala and Python. |
| 14 | + |
| 15 | +SELECT avg(udf(four)) AS avg_1 FROM onek; |
| 16 | + |
| 17 | +SELECT udf(avg(a)) AS avg_32 FROM aggtest WHERE a < 100; |
| 18 | + |
| 19 | +-- In 7.1, avg(float4) is computed using float8 arithmetic. |
| 20 | +-- Round the result to 3 digits to avoid platform-specific results. |
| 21 | + |
| 22 | +select CAST(avg(udf(b)) AS Decimal(10,3)) AS avg_107_943 FROM aggtest; |
| 23 | +-- `student` has a column with data type POINT, which is not supported by Spark [SPARK-27766] |
| 24 | +-- SELECT avg(gpa) AS avg_3_4 FROM ONLY student; |
| 25 | + |
| 26 | +SELECT sum(udf(four)) AS sum_1500 FROM onek; |
| 27 | +SELECT udf(sum(a)) AS sum_198 FROM aggtest; |
| 28 | +SELECT udf(udf(sum(b))) AS avg_431_773 FROM aggtest; |
| 29 | +-- `student` has a column with data type POINT, which is not supported by Spark [SPARK-27766] |
| 30 | +-- SELECT sum(gpa) AS avg_6_8 FROM ONLY student; |
| 31 | + |
| 32 | +SELECT udf(max(four)) AS max_3 FROM onek; |
| 33 | +SELECT max(udf(a)) AS max_100 FROM aggtest; |
| 34 | +SELECT CAST(udf(udf(max(aggtest.b))) AS int) AS max_324_78 FROM aggtest; |
| 35 | +-- `student` has a column with data type POINT, which is not supported by Spark [SPARK-27766] |
| 36 | +-- SELECT max(student.gpa) AS max_3_7 FROM student; |
| 37 | + |
| 38 | +SELECT CAST(stddev_pop(udf(b)) AS int) FROM aggtest; |
| 39 | +SELECT udf(stddev_samp(b)) FROM aggtest; |
| 40 | +SELECT CAST(var_pop(udf(b)) as int) FROM aggtest; |
| 41 | +SELECT udf(var_samp(b)) FROM aggtest; |
| 42 | + |
| 43 | +SELECT udf(stddev_pop(CAST(b AS Decimal(38,0)))) FROM aggtest; |
| 44 | +SELECT stddev_samp(CAST(udf(b) AS Decimal(38,0))) FROM aggtest; |
| 45 | +SELECT udf(var_pop(CAST(b AS Decimal(38,0)))) FROM aggtest; |
| 46 | +SELECT var_samp(udf(CAST(b AS Decimal(38,0)))) FROM aggtest; |
| 47 | + |
| 48 | +-- population variance is defined for a single tuple, sample variance |
| 49 | +-- is not |
| 50 | +SELECT udf(var_pop(1.0)), var_samp(udf(2.0)); |
| 51 | +SELECT stddev_pop(udf(CAST(3.0 AS Decimal(38,0)))), stddev_samp(CAST(udf(4.0) AS Decimal(38,0))); |
| 52 | + |
| 53 | + |
| 54 | +-- verify correct results for null and NaN inputs |
| 55 | +select sum(udf(CAST(null AS int))) from range(1,4); |
| 56 | +select sum(udf(CAST(null AS long))) from range(1,4); |
| 57 | +select sum(udf(CAST(null AS Decimal(38,0)))) from range(1,4); |
| 58 | +select sum(udf(CAST(null AS DOUBLE))) from range(1,4); |
| 59 | +select avg(udf(CAST(null AS int))) from range(1,4); |
| 60 | +select avg(udf(CAST(null AS long))) from range(1,4); |
| 61 | +select avg(udf(CAST(null AS Decimal(38,0)))) from range(1,4); |
| 62 | +select avg(udf(CAST(null AS DOUBLE))) from range(1,4); |
| 63 | +select sum(CAST(udf('NaN') AS DOUBLE)) from range(1,4); |
| 64 | +select avg(CAST(udf('NaN') AS DOUBLE)) from range(1,4); |
| 65 | + |
| 66 | +-- [SPARK-27768] verify correct results for infinite inputs |
| 67 | +-- [SPARK-28291] UDFs cannot be evaluated within inline table definition |
| 68 | +-- SELECT avg(CAST(x AS DOUBLE)), var_pop(CAST(x AS DOUBLE)) |
| 69 | +-- FROM (VALUES (CAST(udf('1') AS DOUBLE)), (CAST(udf('Infinity') AS DOUBLE))) v(x); |
| 70 | +SELECT avg(CAST(udf(x) AS DOUBLE)), var_pop(CAST(udf(x) AS DOUBLE)) |
| 71 | +FROM (VALUES ('Infinity'), ('1')) v(x); |
| 72 | +SELECT avg(CAST(udf(x) AS DOUBLE)), var_pop(CAST(udf(x) AS DOUBLE)) |
| 73 | +FROM (VALUES ('Infinity'), ('Infinity')) v(x); |
| 74 | +SELECT avg(CAST(udf(x) AS DOUBLE)), var_pop(CAST(udf(x) AS DOUBLE)) |
| 75 | +FROM (VALUES ('-Infinity'), ('Infinity')) v(x); |
| 76 | + |
| 77 | + |
| 78 | +-- test accuracy with a large input offset |
| 79 | +SELECT avg(udf(CAST(x AS DOUBLE))), udf(var_pop(CAST(x AS DOUBLE))) |
| 80 | +FROM (VALUES (100000003), (100000004), (100000006), (100000007)) v(x); |
| 81 | +SELECT avg(udf(CAST(x AS DOUBLE))), udf(var_pop(CAST(x AS DOUBLE))) |
| 82 | +FROM (VALUES (7000000000005), (7000000000007)) v(x); |
| 83 | + |
| 84 | +-- SQL2003 binary aggregates [SPARK-23907] |
| 85 | +-- SELECT regr_count(b, a) FROM aggtest; |
| 86 | +-- SELECT regr_sxx(b, a) FROM aggtest; |
| 87 | +-- SELECT regr_syy(b, a) FROM aggtest; |
| 88 | +-- SELECT regr_sxy(b, a) FROM aggtest; |
| 89 | +-- SELECT regr_avgx(b, a), regr_avgy(b, a) FROM aggtest; |
| 90 | +-- SELECT regr_r2(b, a) FROM aggtest; |
| 91 | +-- SELECT regr_slope(b, a), regr_intercept(b, a) FROM aggtest; |
| 92 | +SELECT CAST(udf(covar_pop(b, udf(a))) AS int), CAST(covar_samp(udf(b), a) as int) FROM aggtest; |
| 93 | +SELECT corr(b, udf(a)) FROM aggtest; |
| 94 | + |
| 95 | + |
| 96 | +-- test accum and combine functions directly [SPARK-23907] |
| 97 | +-- CREATE TABLE regr_test (x float8, y float8); |
| 98 | +-- INSERT INTO regr_test VALUES (10,150),(20,250),(30,350),(80,540),(100,200); |
| 99 | +-- SELECT count(*), sum(x), regr_sxx(y,x), sum(y),regr_syy(y,x), regr_sxy(y,x) |
| 100 | +-- FROM regr_test WHERE x IN (10,20,30,80); |
| 101 | +-- SELECT count(*), sum(x), regr_sxx(y,x), sum(y),regr_syy(y,x), regr_sxy(y,x) |
| 102 | +-- FROM regr_test; |
| 103 | +-- SELECT float8_accum('{4,140,2900}'::float8[], 100); |
| 104 | +-- SELECT float8_regr_accum('{4,140,2900,1290,83075,15050}'::float8[], 200, 100); |
| 105 | +-- SELECT count(*), sum(x), regr_sxx(y,x), sum(y),regr_syy(y,x), regr_sxy(y,x) |
| 106 | +-- FROM regr_test WHERE x IN (10,20,30); |
| 107 | +-- SELECT count(*), sum(x), regr_sxx(y,x), sum(y),regr_syy(y,x), regr_sxy(y,x) |
| 108 | +-- FROM regr_test WHERE x IN (80,100); |
| 109 | +-- SELECT float8_combine('{3,60,200}'::float8[],ELECT CAST(udf(covar_pop(b, udf(a))) AS '{0,0,0}'::float8[]); |
| 110 | +-- SELECT float8_combine('{0,0,0}'::float8[], '{2,180,200}'::float8[]); |
| 111 | +-- SELECT float8_combine('{3,60,200}'::float8[], '{2,180,200}'::float8[]); |
| 112 | +-- SELECT float8_regr_combine('{3,60,200,750,20000,2000}'::float8[], |
| 113 | +-- '{0,0,0,0,0,0}'::float8[]); |
| 114 | +-- SELECT float8_regr_combine('{0,0,0,0,0,0}'::float8[], |
| 115 | +-- '{2,180,200,740,57800,-3400}'::float8[]); |
| 116 | +-- SELECT float8_regr_combine('{3,60,200,750,20000,2000}'::float8[], |
| 117 | +-- '{2,180,200,740,57800,-3400}'::float8[]); |
| 118 | +-- DROP TABLE regr_test; |
| 119 | + |
| 120 | + |
| 121 | +-- test count, distinct |
| 122 | +SELECT count(udf(four)) AS cnt_1000 FROM onek; |
| 123 | +SELECT udf(count(DISTINCT four)) AS cnt_4 FROM onek; |
| 124 | + |
| 125 | +select ten, udf(count(*)), sum(udf(four)) from onek |
| 126 | +group by ten order by ten; |
| 127 | + |
| 128 | +select ten, count(udf(four)), udf(sum(DISTINCT four)) from onek |
| 129 | +group by ten order by ten; |
| 130 | + |
| 131 | +-- user-defined aggregates |
| 132 | +-- SELECT newavg(four) AS avg_1 FROM onek; |
| 133 | +-- SELECT newsum(four) AS sum_1500 FROM onek; |
| 134 | +-- SELECT newcnt(four) AS cnt_1000 FROM onek; |
| 135 | +-- SELECT newcnt(*) AS cnt_1000 FROM onek; |
| 136 | +-- SELECT oldcnt(*) AS cnt_1000 FROM onek; |
| 137 | +-- SELECT sum2(q1,q2) FROM int8_tbl; |
| 138 | + |
| 139 | +-- test for outer-level aggregates |
| 140 | + |
| 141 | +-- this should work |
| 142 | +select ten, udf(sum(distinct four)) from onek a |
| 143 | +group by ten |
| 144 | +having exists (select 1 from onek b where udf(sum(distinct a.four)) = b.four); |
| 145 | + |
| 146 | +-- this should fail because subquery has an agg of its own in WHERE |
| 147 | +select ten, sum(distinct four) from onek a |
| 148 | +group by ten |
| 149 | +having exists (select 1 from onek b |
| 150 | + where sum(distinct a.four + b.four) = udf(b.four)); |
| 151 | + |
| 152 | +-- [SPARK-27769] Test handling of sublinks within outer-level aggregates. |
| 153 | +-- Per bug report from Daniel Grace. |
| 154 | +select |
| 155 | + (select udf(max((select i.unique2 from tenk1 i where i.unique1 = o.unique1)))) |
| 156 | +from tenk1 o; |
0 commit comments