Skip to content

Commit cd676e9

Browse files
huaxingaoHyukjinKwon
authored andcommitted
[SPARK-28277][SQL][PYTHON][TESTS] Convert and port 'except.sql' into UDF test base
## What changes were proposed in this pull request? This PR adds some tests converted from ```except.sql``` to test UDFs. Please see contribution guide of this umbrella ticket - [SPARK-27921](https://issues.apache.org/jira/browse/SPARK-27921). <details><summary>Diff comparing to 'except.sql'</summary> <p> ```diff diff --git a/sql/core/src/test/resources/sql-tests/results/except.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-except.sql.out index c9b712d..27ca7ea 100644 --- a/sql/core/src/test/resources/sql-tests/results/except.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-except.sql.out -30,16 +30,16 struct<> -- !query 2 -SELECT * FROM t1 EXCEPT SELECT * FROM t2 +SELECT udf(k), udf(v) FROM t1 EXCEPT SELECT udf(k), udf(v) FROM t2 -- !query 2 schema -struct<k:string,v:int> +struct<CAST(udf(cast(k as string)) AS STRING):string,CAST(udf(cast(v as string)) AS INT):int> -- !query 2 output three 3 two 2 -- !query 3 -SELECT * FROM t1 EXCEPT SELECT * FROM t1 where v <> 1 and v <> 2 +SELECT * FROM t1 EXCEPT SELECT * FROM t1 where udf(v) <> 1 and v <> udf(2) -- !query 3 schema struct<k:string,v:int> -- !query 3 output -49,7 +49,7 two 2 -- !query 4 -SELECT * FROM t1 where v <> 1 and v <> 22 EXCEPT SELECT * FROM t1 where v <> 2 and v >= 3 +SELECT * FROM t1 where udf(v) <> 1 and v <> udf(22) EXCEPT SELECT * FROM t1 where udf(v) <> 2 and v >= udf(3) -- !query 4 schema struct<k:string,v:int> -- !query 4 output -59,7 +59,7 two 2 -- !query 5 SELECT t1.* FROM t1, t2 where t1.k = t2.k EXCEPT -SELECT t1.* FROM t1, t2 where t1.k = t2.k and t1.k != 'one' +SELECT t1.* FROM t1, t2 where t1.k = t2.k and t1.k != udf('one') -- !query 5 schema struct<k:string,v:int> -- !query 5 output -68,7 +68,7 one NULL -- !query 6 -SELECT * FROM t2 where v >= 1 and v <> 22 EXCEPT SELECT * FROM t1 +SELECT * FROM t2 where v >= udf(1) and udf(v) <> 22 EXCEPT SELECT * FROM t1 -- !query 6 schema struct<k:string,v:int> -- !query 6 output -77,9 +77,9 one 5 -- !query 7 -SELECT (SELECT min(k) FROM t2 WHERE t2.k = t1.k) min_t2 FROM t1 +SELECT (SELECT min(udf(k)) FROM t2 WHERE t2.k = t1.k) min_t2 FROM t1 MINUS -SELECT (SELECT min(k) FROM t2) abs_min_t2 FROM t1 WHERE t1.k = 'one' +SELECT (SELECT udf(min(k)) FROM t2) abs_min_t2 FROM t1 WHERE t1.k = udf('one') -- !query 7 schema struct<min_t2:string> -- !query 7 output -90,16 +90,17 two -- !query 8 SELECT t1.k FROM t1 -WHERE t1.v <= (SELECT max(t2.v) +WHERE t1.v <= (SELECT udf(max(udf(t2.v))) FROM t2 - WHERE t2.k = t1.k) + WHERE udf(t2.k) = udf(t1.k)) MINUS SELECT t1.k FROM t1 -WHERE t1.v >= (SELECT min(t2.v) +WHERE udf(t1.v) >= (SELECT min(udf(t2.v)) FROM t2 WHERE t2.k = t1.k) -- !query 8 schema -struct<k:string> +struct<> -- !query 8 output -two +java.lang.UnsupportedOperationException +Cannot evaluate expression: udf(cast(null as string)) ``` </p> </details> ## How was this patch tested? Tested as guided in [SPARK-27921.](https://issues.apache.org/jira/browse/SPARK-27921) Closes apache#25101 from huaxingao/spark-28277. Authored-by: Huaxin Gao <[email protected]> Signed-off-by: HyukjinKwon <[email protected]>
1 parent 20578e8 commit cd676e9

File tree

2 files changed

+146
-0
lines changed

2 files changed

+146
-0
lines changed
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
-- This test file was converted from except.sql.
2+
-- Tests different scenarios of except operation
3+
create temporary view t1 as select * from values
4+
("one", 1),
5+
("two", 2),
6+
("three", 3),
7+
("one", NULL)
8+
as t1(k, v);
9+
10+
create temporary view t2 as select * from values
11+
("one", 1),
12+
("two", 22),
13+
("one", 5),
14+
("one", NULL),
15+
(NULL, 5)
16+
as t2(k, v);
17+
18+
19+
-- Except operation that will be replaced by left anti join
20+
SELECT udf(k), udf(v) FROM t1 EXCEPT SELECT udf(k), udf(v) FROM t2;
21+
22+
23+
-- Except operation that will be replaced by Filter: SPARK-22181
24+
SELECT * FROM t1 EXCEPT SELECT * FROM t1 where udf(v) <> 1 and v <> udf(2);
25+
26+
27+
-- Except operation that will be replaced by Filter: SPARK-22181
28+
SELECT * FROM t1 where udf(v) <> 1 and v <> udf(22) EXCEPT SELECT * FROM t1 where udf(v) <> 2 and v >= udf(3);
29+
30+
31+
-- Except operation that will be replaced by Filter: SPARK-22181
32+
SELECT t1.* FROM t1, t2 where t1.k = t2.k
33+
EXCEPT
34+
SELECT t1.* FROM t1, t2 where t1.k = t2.k and t1.k != udf('one');
35+
36+
37+
-- Except operation that will be replaced by left anti join
38+
SELECT * FROM t2 where v >= udf(1) and udf(v) <> 22 EXCEPT SELECT * FROM t1;
39+
40+
41+
-- Except operation that will be replaced by left anti join
42+
SELECT (SELECT min(udf(k)) FROM t2 WHERE t2.k = t1.k) min_t2 FROM t1
43+
MINUS
44+
SELECT (SELECT udf(min(k)) FROM t2) abs_min_t2 FROM t1 WHERE t1.k = udf('one');
45+
46+
47+
-- Except operation that will be replaced by left anti join
48+
--- [SPARK-28441] udf(max(udf(column))) throws java.lang.UnsupportedOperationException: Cannot evaluate expression: udf(null)
49+
--- SELECT t1.k
50+
--- FROM t1
51+
--- WHERE t1.v <= (SELECT udf(max(udf(t2.v)))
52+
--- FROM t2
53+
--- WHERE udf(t2.k) = udf(t1.k))
54+
--- MINUS
55+
--- SELECT t1.k
56+
--- FROM t1
57+
--- WHERE udf(t1.v) >= (SELECT min(udf(t2.v))
58+
--- FROM t2
59+
--- WHERE t2.k = t1.k);
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
-- Automatically generated by SQLQueryTestSuite
2+
-- Number of queries: 8
3+
4+
5+
-- !query 0
6+
create temporary view t1 as select * from values
7+
("one", 1),
8+
("two", 2),
9+
("three", 3),
10+
("one", NULL)
11+
as t1(k, v)
12+
-- !query 0 schema
13+
struct<>
14+
-- !query 0 output
15+
16+
17+
18+
-- !query 1
19+
create temporary view t2 as select * from values
20+
("one", 1),
21+
("two", 22),
22+
("one", 5),
23+
("one", NULL),
24+
(NULL, 5)
25+
as t2(k, v)
26+
-- !query 1 schema
27+
struct<>
28+
-- !query 1 output
29+
30+
31+
32+
-- !query 2
33+
SELECT udf(k), udf(v) FROM t1 EXCEPT SELECT udf(k), udf(v) FROM t2
34+
-- !query 2 schema
35+
struct<CAST(udf(cast(k as string)) AS STRING):string,CAST(udf(cast(v as string)) AS INT):int>
36+
-- !query 2 output
37+
three 3
38+
two 2
39+
40+
41+
-- !query 3
42+
SELECT * FROM t1 EXCEPT SELECT * FROM t1 where udf(v) <> 1 and v <> udf(2)
43+
-- !query 3 schema
44+
struct<k:string,v:int>
45+
-- !query 3 output
46+
one 1
47+
one NULL
48+
two 2
49+
50+
51+
-- !query 4
52+
SELECT * FROM t1 where udf(v) <> 1 and v <> udf(22) EXCEPT SELECT * FROM t1 where udf(v) <> 2 and v >= udf(3)
53+
-- !query 4 schema
54+
struct<k:string,v:int>
55+
-- !query 4 output
56+
two 2
57+
58+
59+
-- !query 5
60+
SELECT t1.* FROM t1, t2 where t1.k = t2.k
61+
EXCEPT
62+
SELECT t1.* FROM t1, t2 where t1.k = t2.k and t1.k != udf('one')
63+
-- !query 5 schema
64+
struct<k:string,v:int>
65+
-- !query 5 output
66+
one 1
67+
one NULL
68+
69+
70+
-- !query 6
71+
SELECT * FROM t2 where v >= udf(1) and udf(v) <> 22 EXCEPT SELECT * FROM t1
72+
-- !query 6 schema
73+
struct<k:string,v:int>
74+
-- !query 6 output
75+
NULL 5
76+
one 5
77+
78+
79+
-- !query 7
80+
SELECT (SELECT min(udf(k)) FROM t2 WHERE t2.k = t1.k) min_t2 FROM t1
81+
MINUS
82+
SELECT (SELECT udf(min(k)) FROM t2) abs_min_t2 FROM t1 WHERE t1.k = udf('one')
83+
-- !query 7 schema
84+
struct<min_t2:string>
85+
-- !query 7 output
86+
NULL
87+
two

0 commit comments

Comments
 (0)