Skip to content

Commit 3a94fb3

Browse files
huaxingaoHyukjinKwon
authored andcommitted
[SPARK-28281][SQL][PYTHON][TESTS] Convert and port 'having.sql' into UDF test base
## What changes were proposed in this pull request? This PR adds some tests converted from having.sql to test UDFs following the combination guide in [SPARK-27921](url) <details><summary>Diff comparing to 'having.sql'</summary> <p> ```diff diff --git a/sql/core/src/test/resources/sql-tests/results/having.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-having.sql.out index d87ee52..7cea2e5 100644 --- a/sql/core/src/test/resources/sql-tests/results/having.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-having.sql.out -16,34 +16,34 struct<> -- !query 1 -SELECT k, sum(v) FROM hav GROUP BY k HAVING sum(v) > 2 +SELECT udf(k) AS k, udf(sum(v)) FROM hav GROUP BY k HAVING udf(sum(v)) > 2 -- !query 1 schema -struct<k:string,sum(v):bigint> +struct<k:string,udf(sum(cast(v as bigint))):string> -- !query 1 output one 6 three 3 -- !query 2 -SELECT count(k) FROM hav GROUP BY v + 1 HAVING v + 1 = 2 +SELECT udf(count(udf(k))) FROM hav GROUP BY v + 1 HAVING v + 1 = udf(2) -- !query 2 schema -struct<count(k):bigint> +struct<udf(count(udf(k))):string> -- !query 2 output 1 -- !query 3 -SELECT MIN(t.v) FROM (SELECT * FROM hav WHERE v > 0) t HAVING(COUNT(1) > 0) +SELECT udf(MIN(t.v)) FROM (SELECT * FROM hav WHERE v > 0) t HAVING(udf(COUNT(udf(1))) > 0) -- !query 3 schema -struct<min(v):int> +struct<udf(min(v)):string> -- !query 3 output 1 -- !query 4 -SELECT a + b FROM VALUES (1L, 2), (3L, 4) AS T(a, b) GROUP BY a + b HAVING a + b > 1 +SELECT udf(a + b) FROM VALUES (1L, 2), (3L, 4) AS T(a, b) GROUP BY a + b HAVING a + b > udf(1) -- !query 4 schema -struct<(a + CAST(b AS BIGINT)):bigint> +struct<udf((a + cast(b as bigint))):string> -- !query 4 output 3 7 ``` </p> </details> ## How was this patch tested? Tested as guided in SPARK-27921. Closes apache#25093 from huaxingao/spark-28281. Authored-by: Huaxin Gao <[email protected]> Signed-off-by: HyukjinKwon <[email protected]>
1 parent 8d686f3 commit 3a94fb3

File tree

2 files changed

+71
-0
lines changed

2 files changed

+71
-0
lines changed
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
-- This test file was converted from having.sql.
2+
-- Note that currently registered UDF returns a string. So there are some differences, for instance
3+
-- in string cast within UDF in Scala and Python.
4+
5+
create temporary view hav as select * from values
6+
("one", 1),
7+
("two", 2),
8+
("three", 3),
9+
("one", 5)
10+
as hav(k, v);
11+
12+
-- having clause
13+
SELECT udf(k) AS k, udf(sum(v)) FROM hav GROUP BY k HAVING udf(sum(v)) > 2;
14+
15+
-- having condition contains grouping column
16+
SELECT udf(count(udf(k))) FROM hav GROUP BY v + 1 HAVING v + 1 = udf(2);
17+
18+
-- SPARK-11032: resolve having correctly
19+
SELECT udf(MIN(t.v)) FROM (SELECT * FROM hav WHERE v > 0) t HAVING(udf(COUNT(udf(1))) > 0);
20+
21+
-- SPARK-20329: make sure we handle timezones correctly
22+
SELECT udf(a + b) FROM VALUES (1L, 2), (3L, 4) AS T(a, b) GROUP BY a + b HAVING a + b > udf(1);
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
-- Automatically generated by SQLQueryTestSuite
2+
-- Number of queries: 5
3+
4+
5+
-- !query 0
6+
create temporary view hav as select * from values
7+
("one", 1),
8+
("two", 2),
9+
("three", 3),
10+
("one", 5)
11+
as hav(k, v)
12+
-- !query 0 schema
13+
struct<>
14+
-- !query 0 output
15+
16+
17+
18+
-- !query 1
19+
SELECT udf(k) AS k, udf(sum(v)) FROM hav GROUP BY k HAVING udf(sum(v)) > 2
20+
-- !query 1 schema
21+
struct<k:string,udf(sum(cast(v as bigint))):string>
22+
-- !query 1 output
23+
one 6
24+
three 3
25+
26+
27+
-- !query 2
28+
SELECT udf(count(udf(k))) FROM hav GROUP BY v + 1 HAVING v + 1 = udf(2)
29+
-- !query 2 schema
30+
struct<udf(count(udf(k))):string>
31+
-- !query 2 output
32+
1
33+
34+
35+
-- !query 3
36+
SELECT udf(MIN(t.v)) FROM (SELECT * FROM hav WHERE v > 0) t HAVING(udf(COUNT(udf(1))) > 0)
37+
-- !query 3 schema
38+
struct<udf(min(v)):string>
39+
-- !query 3 output
40+
1
41+
42+
43+
-- !query 4
44+
SELECT udf(a + b) FROM VALUES (1L, 2), (3L, 4) AS T(a, b) GROUP BY a + b HAVING a + b > udf(1)
45+
-- !query 4 schema
46+
struct<udf((a + cast(b as bigint))):string>
47+
-- !query 4 output
48+
3
49+
7

0 commit comments

Comments
 (0)