Skip to content

Commit 4645ffb

Browse files
viiryaHyukjinKwon
authored andcommitted
[SPARK-28276][SQL][PYTHON][TEST] Convert and port 'cross-join.sql' into UDF test base
## What changes were proposed in this pull request? This PR adds some tests converted from `cross-join.sql'` to test UDFs. <details><summary>Diff comparing to 'cross-join.sql'</summary> <p> ```diff diff --git a/sql/core/src/test/resources/sql-tests/results/cross-join.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-cross-join.sql.out index 3833c42..11c1e01 100644 --- a/sql/core/src/test/resources/sql-tests/results/cross-join.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-cross-join.sql.out -43,7 +43,7 two 2 two 22 -- !query 3 -SELECT * FROM nt1 cross join nt2 where nt1.k = nt2.k +SELECT * FROM nt1 cross join nt2 where udf(nt1.k) = udf(nt2.k) -- !query 3 schema struct<k:string,v1:int,k:string,v2:int> -- !query 3 output -53,7 +53,7 two 2 two 22 -- !query 4 -SELECT * FROM nt1 cross join nt2 on (nt1.k = nt2.k) +SELECT * FROM nt1 cross join nt2 on (udf(nt1.k) = udf(nt2.k)) -- !query 4 schema struct<k:string,v1:int,k:string,v2:int> -- !query 4 output -63,7 +63,7 two 2 two 22 -- !query 5 -SELECT * FROM nt1 cross join nt2 where nt1.v1 = 1 and nt2.v2 = 22 +SELECT * FROM nt1 cross join nt2 where udf(nt1.v1) = "1" and udf(nt2.v2) = "22" -- !query 5 schema struct<k:string,v1:int,k:string,v2:int> -- !query 5 output -71,12 +71,12 one 1 two 22 -- !query 6 -SELECT a.key, b.key FROM -(SELECT k key FROM nt1 WHERE v1 < 2) a +SELECT udf(a.key), udf(b.key) FROM +(SELECT udf(k) key FROM nt1 WHERE v1 < 2) a CROSS JOIN -(SELECT k key FROM nt2 WHERE v2 = 22) b +(SELECT udf(k) key FROM nt2 WHERE v2 = 22) b -- !query 6 schema -struct<key:string,key:string> +struct<udf(key):string,udf(key):string> -- !query 6 output one two -114,23 +114,29 struct<> -- !query 11 -select * from ((A join B on (a = b)) cross join C) join D on (a = d) +select * from ((A join B on (udf(a) = udf(b))) cross join C) join D on (udf(a) = udf(d)) -- !query 11 schema -struct<a:string,va:int,b:string,vb:int,c:string,vc:int,d:string,vd:int> +struct<> -- !query 11 output -one 1 one 1 one 1 one 1 -one 1 one 1 three 3 one 1 -one 1 one 1 two 2 one 1 -three 3 three 3 one 1 three 3 -three 3 three 3 three 3 three 3 -three 3 three 3 two 2 three 3 -two 2 two 2 one 1 two 2 -two 2 two 2 three 3 two 2 -two 2 two 2 two 2 two 2 +org.apache.spark.sql.AnalysisException +Detected implicit cartesian product for INNER join between logical plans +Filter (udf(a#x) = udf(b#x)) ++- Join Inner + :- Project [k#x AS a#x, v1#x AS va#x] + : +- LocalRelation [k#x, v1#x] + +- Project [k#x AS b#x, v1#x AS vb#x] + +- LocalRelation [k#x, v1#x] +and +Project [k#x AS d#x, v1#x AS vd#x] ++- LocalRelation [k#x, v1#x] +Join condition is missing or trivial. +Either: use the CROSS JOIN syntax to allow cartesian products between these +relations, or: enable implicit cartesian products by setting the configuration +variable spark.sql.crossJoin.enabled=true; -- !query 12 -SELECT * FROM nt1 CROSS JOIN nt2 ON (nt1.k > nt2.k) +SELECT * FROM nt1 CROSS JOIN nt2 ON (udf(nt1.k) > udf(nt2.k)) -- !query 12 schema struct<k:string,v1:int,k:string,v2:int> -- !query 12 output ``` </p> </details> ## How was this patch tested? Added test. Closes apache#25168 from viirya/SPARK-28276. Authored-by: Liang-Chi Hsieh <[email protected]> Signed-off-by: HyukjinKwon <[email protected]>
1 parent 971e832 commit 4645ffb

File tree

2 files changed

+177
-0
lines changed

2 files changed

+177
-0
lines changed
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
-- Cross join detection and error checking is done in JoinSuite since explain output is
2+
-- used in the error message and the ids are not stable. Only positive cases are checked here.
3+
-- This test file was converted from cross-join.sql.
4+
5+
create temporary view nt1 as select * from values
6+
("one", 1),
7+
("two", 2),
8+
("three", 3)
9+
as nt1(k, v1);
10+
11+
create temporary view nt2 as select * from values
12+
("one", 1),
13+
("two", 22),
14+
("one", 5)
15+
as nt2(k, v2);
16+
17+
-- Cross joins with and without predicates
18+
SELECT * FROM nt1 cross join nt2;
19+
SELECT * FROM nt1 cross join nt2 where udf(nt1.k) = udf(nt2.k);
20+
SELECT * FROM nt1 cross join nt2 on (udf(nt1.k) = udf(nt2.k));
21+
SELECT * FROM nt1 cross join nt2 where udf(nt1.v1) = "1" and udf(nt2.v2) = "22";
22+
23+
SELECT udf(a.key), udf(b.key) FROM
24+
(SELECT udf(k) key FROM nt1 WHERE v1 < 2) a
25+
CROSS JOIN
26+
(SELECT udf(k) key FROM nt2 WHERE v2 = 22) b;
27+
28+
-- Join reordering
29+
create temporary view A(a, va) as select * from nt1;
30+
create temporary view B(b, vb) as select * from nt1;
31+
create temporary view C(c, vc) as select * from nt1;
32+
create temporary view D(d, vd) as select * from nt1;
33+
34+
-- Allowed since cross join with C is explicit
35+
select * from ((A join B on (udf(a) = udf(b))) cross join C) join D on (udf(a) = udf(d));
36+
-- Cross joins with non-equal predicates
37+
SELECT * FROM nt1 CROSS JOIN nt2 ON (udf(nt1.k) > udf(nt2.k));
Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
-- Automatically generated by SQLQueryTestSuite
2+
-- Number of queries: 13
3+
4+
5+
-- !query 0
6+
create temporary view nt1 as select * from values
7+
("one", 1),
8+
("two", 2),
9+
("three", 3)
10+
as nt1(k, v1)
11+
-- !query 0 schema
12+
struct<>
13+
-- !query 0 output
14+
15+
16+
17+
-- !query 1
18+
create temporary view nt2 as select * from values
19+
("one", 1),
20+
("two", 22),
21+
("one", 5)
22+
as nt2(k, v2)
23+
-- !query 1 schema
24+
struct<>
25+
-- !query 1 output
26+
27+
28+
29+
-- !query 2
30+
SELECT * FROM nt1 cross join nt2
31+
-- !query 2 schema
32+
struct<k:string,v1:int,k:string,v2:int>
33+
-- !query 2 output
34+
one 1 one 1
35+
one 1 one 5
36+
one 1 two 22
37+
three 3 one 1
38+
three 3 one 5
39+
three 3 two 22
40+
two 2 one 1
41+
two 2 one 5
42+
two 2 two 22
43+
44+
45+
-- !query 3
46+
SELECT * FROM nt1 cross join nt2 where udf(nt1.k) = udf(nt2.k)
47+
-- !query 3 schema
48+
struct<k:string,v1:int,k:string,v2:int>
49+
-- !query 3 output
50+
one 1 one 1
51+
one 1 one 5
52+
two 2 two 22
53+
54+
55+
-- !query 4
56+
SELECT * FROM nt1 cross join nt2 on (udf(nt1.k) = udf(nt2.k))
57+
-- !query 4 schema
58+
struct<k:string,v1:int,k:string,v2:int>
59+
-- !query 4 output
60+
one 1 one 1
61+
one 1 one 5
62+
two 2 two 22
63+
64+
65+
-- !query 5
66+
SELECT * FROM nt1 cross join nt2 where udf(nt1.v1) = "1" and udf(nt2.v2) = "22"
67+
-- !query 5 schema
68+
struct<k:string,v1:int,k:string,v2:int>
69+
-- !query 5 output
70+
one 1 two 22
71+
72+
73+
-- !query 6
74+
SELECT udf(a.key), udf(b.key) FROM
75+
(SELECT udf(k) key FROM nt1 WHERE v1 < 2) a
76+
CROSS JOIN
77+
(SELECT udf(k) key FROM nt2 WHERE v2 = 22) b
78+
-- !query 6 schema
79+
struct<CAST(udf(cast(key as string)) AS STRING):string,CAST(udf(cast(key as string)) AS STRING):string>
80+
-- !query 6 output
81+
one two
82+
83+
84+
-- !query 7
85+
create temporary view A(a, va) as select * from nt1
86+
-- !query 7 schema
87+
struct<>
88+
-- !query 7 output
89+
90+
91+
92+
-- !query 8
93+
create temporary view B(b, vb) as select * from nt1
94+
-- !query 8 schema
95+
struct<>
96+
-- !query 8 output
97+
98+
99+
100+
-- !query 9
101+
create temporary view C(c, vc) as select * from nt1
102+
-- !query 9 schema
103+
struct<>
104+
-- !query 9 output
105+
106+
107+
108+
-- !query 10
109+
create temporary view D(d, vd) as select * from nt1
110+
-- !query 10 schema
111+
struct<>
112+
-- !query 10 output
113+
114+
115+
116+
-- !query 11
117+
select * from ((A join B on (udf(a) = udf(b))) cross join C) join D on (udf(a) = udf(d))
118+
-- !query 11 schema
119+
struct<a:string,va:int,b:string,vb:int,c:string,vc:int,d:string,vd:int>
120+
-- !query 11 output
121+
one 1 one 1 one 1 one 1
122+
one 1 one 1 three 3 one 1
123+
one 1 one 1 two 2 one 1
124+
three 3 three 3 one 1 three 3
125+
three 3 three 3 three 3 three 3
126+
three 3 three 3 two 2 three 3
127+
two 2 two 2 one 1 two 2
128+
two 2 two 2 three 3 two 2
129+
two 2 two 2 two 2 two 2
130+
131+
132+
-- !query 12
133+
SELECT * FROM nt1 CROSS JOIN nt2 ON (udf(nt1.k) > udf(nt2.k))
134+
-- !query 12 schema
135+
struct<k:string,v1:int,k:string,v2:int>
136+
-- !query 12 output
137+
three 3 one 1
138+
three 3 one 5
139+
two 2 one 1
140+
two 2 one 5

0 commit comments

Comments
 (0)