Skip to content

Commit b598dfd

Browse files
vinodkcHyukjinKwon
authored andcommitted
[SPARK-28275][SQL][PYTHON][TESTS] Convert and port 'count.sql' into UDF test base
## What changes were proposed in this pull request? This PR adds some tests converted from 'count.sql' to test UDFs <details><summary>Diff comparing to 'count.sql'</summary> <p> ```diff diff --git a/sql/core/src/test/resources/sql-tests/results/count.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-count.sql.out index b8a86d4..9476937 100644 --- a/sql/core/src/test/resources/sql-tests/results/count.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-count.sql.out -14,42 +14,42 struct<> -- !query 1 SELECT - count(*), count(1), count(null), count(a), count(b), count(a + b), count((a, b)) + udf(count(*)), udf(count(1)), udf(count(null)), udf(count(a)), udf(count(b)), udf(count(a + b)), udf(count((a, b))) FROM testData -- !query 1 schema -struct<count(1):bigint,count(1):bigint,count(NULL):bigint,count(a):bigint,count(b):bigint,count((a + b)):bigint,count(named_struct(a, a, b, b)):bigint> +struct<udf(count(1)):string,udf(count(1)):string,udf(count(null)):string,udf(count(a)):string,udf(count(b)):string,udf(count((a + b))):string,udf(count(named_struct(a, a, b, b))):string> -- !query 1 output 7 7 0 5 5 4 7 -- !query 2 SELECT - count(DISTINCT 1), - count(DISTINCT null), - count(DISTINCT a), - count(DISTINCT b), - count(DISTINCT (a + b)), - count(DISTINCT (a, b)) + udf(count(DISTINCT 1)), + udf(count(DISTINCT null)), + udf(count(DISTINCT a)), + udf(count(DISTINCT b)), + udf(count(DISTINCT (a + b))), + udf(count(DISTINCT (a, b))) FROM testData -- !query 2 schema -struct<count(DISTINCT 1):bigint,count(DISTINCT NULL):bigint,count(DISTINCT a):bigint,count(DISTINCT b):bigint,count(DISTINCT (a + b)):bigint,count(DISTINCT named_struct(a, a, b, b)):bigint> +struct<udf(count(distinct 1)):string,udf(count(distinct null)):string,udf(count(distinct a)):string,udf(count(distinct b)):string,udf(count(distinct (a + b))):string,udf(count(distinct named_struct(a, a, b, b))):string> -- !query 2 output 1 0 2 2 2 6 -- !query 3 -SELECT count(a, b), count(b, a), count(testData.*) FROM testData +SELECT udf(count(a, b)), udf(count(b, a)), udf(count(testData.*)) FROM testData -- !query 3 schema -struct<count(a, b):bigint,count(b, a):bigint,count(a, b):bigint> +struct<udf(count(a, b)):string,udf(count(b, a)):string,udf(count(a, b)):string> -- !query 3 output 4 4 4 -- !query 4 SELECT - count(DISTINCT a, b), count(DISTINCT b, a), count(DISTINCT *), count(DISTINCT testData.*) + udf(count(DISTINCT a, b)), udf(count(DISTINCT b, a)), udf(count(DISTINCT *)), udf(count(DISTINCT testData.*)) FROM testData -- !query 4 schema -struct<count(DISTINCT a, b):bigint,count(DISTINCT b, a):bigint,count(DISTINCT a, b):bigint,count(DISTINCT a, b):bigint> +struct<udf(count(distinct a, b)):string,udf(count(distinct b, a)):string,udf(count(distinct a, b)):string,udf(count(distinct a, b)):string> -- !query 4 output 3 3 3 3 ``` </p> </details> ## How was this patch tested? Tested as guided in SPARK-27921. Closes apache#25089 from vinodkc/br_Fix_SPARK-28275. Authored-by: Vinod KC <[email protected]> Signed-off-by: HyukjinKwon <[email protected]>
1 parent 06ac7d5 commit b598dfd

File tree

2 files changed

+83
-0
lines changed

2 files changed

+83
-0
lines changed
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
-- This test file was converted from count.sql
2+
-- Test data.
3+
CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES
4+
(1, 1), (1, 2), (2, 1), (1, 1), (null, 2), (1, null), (null, null)
5+
AS testData(a, b);
6+
7+
-- count with single expression
8+
SELECT
9+
udf(count(*)), udf(count(1)), udf(count(null)), udf(count(a)), udf(count(b)), udf(count(a + b)), udf(count((a, b)))
10+
FROM testData;
11+
12+
-- distinct count with single expression
13+
SELECT
14+
udf(count(DISTINCT 1)),
15+
udf(count(DISTINCT null)),
16+
udf(count(DISTINCT a)),
17+
udf(count(DISTINCT b)),
18+
udf(count(DISTINCT (a + b))),
19+
udf(count(DISTINCT (a, b)))
20+
FROM testData;
21+
22+
-- count with multiple expressions
23+
SELECT udf(count(a, b)), udf(count(b, a)), udf(count(testData.*)) FROM testData;
24+
25+
-- distinct count with multiple expressions
26+
SELECT
27+
udf(count(DISTINCT a, b)), udf(count(DISTINCT b, a)), udf(count(DISTINCT *)), udf(count(DISTINCT testData.*))
28+
FROM testData;
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
-- Automatically generated by SQLQueryTestSuite
2+
-- Number of queries: 5
3+
4+
5+
-- !query 0
6+
CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES
7+
(1, 1), (1, 2), (2, 1), (1, 1), (null, 2), (1, null), (null, null)
8+
AS testData(a, b)
9+
-- !query 0 schema
10+
struct<>
11+
-- !query 0 output
12+
13+
14+
15+
-- !query 1
16+
SELECT
17+
udf(count(*)), udf(count(1)), udf(count(null)), udf(count(a)), udf(count(b)), udf(count(a + b)), udf(count((a, b)))
18+
FROM testData
19+
-- !query 1 schema
20+
struct<udf(count(1)):string,udf(count(1)):string,udf(count(null)):string,udf(count(a)):string,udf(count(b)):string,udf(count((a + b))):string,udf(count(named_struct(a, a, b, b))):string>
21+
-- !query 1 output
22+
7 7 0 5 5 4 7
23+
24+
25+
-- !query 2
26+
SELECT
27+
udf(count(DISTINCT 1)),
28+
udf(count(DISTINCT null)),
29+
udf(count(DISTINCT a)),
30+
udf(count(DISTINCT b)),
31+
udf(count(DISTINCT (a + b))),
32+
udf(count(DISTINCT (a, b)))
33+
FROM testData
34+
-- !query 2 schema
35+
struct<udf(count(distinct 1)):string,udf(count(distinct null)):string,udf(count(distinct a)):string,udf(count(distinct b)):string,udf(count(distinct (a + b))):string,udf(count(distinct named_struct(a, a, b, b))):string>
36+
-- !query 2 output
37+
1 0 2 2 2 6
38+
39+
40+
-- !query 3
41+
SELECT udf(count(a, b)), udf(count(b, a)), udf(count(testData.*)) FROM testData
42+
-- !query 3 schema
43+
struct<udf(count(a, b)):string,udf(count(b, a)):string,udf(count(a, b)):string>
44+
-- !query 3 output
45+
4 4 4
46+
47+
48+
-- !query 4
49+
SELECT
50+
udf(count(DISTINCT a, b)), udf(count(DISTINCT b, a)), udf(count(DISTINCT *)), udf(count(DISTINCT testData.*))
51+
FROM testData
52+
-- !query 4 schema
53+
struct<udf(count(distinct a, b)):string,udf(count(distinct b, a)):string,udf(count(distinct a, b)):string,udf(count(distinct a, b)):string>
54+
-- !query 4 output
55+
3 3 3 3

0 commit comments

Comments
 (0)