Skip to content

Commit 0ab922c

Browse files
turboFeicloud-fan
authored andcommitted
[SPARK-29860][SQL] Fix dataType mismatch issue for InSubquery
### What changes were proposed in this pull request? There is an issue for InSubquery expression. For example, there are two tables `ta` and `tb` created by the below statements. ``` sql("create table ta(id Decimal(18,0)) using parquet") sql("create table tb(id Decimal(19,0)) using parquet") ``` This statement below would thrown dataType mismatch exception. ``` sql("select * from ta where id in (select id from tb)").show() ``` However, this similar statement could execute successfully. ``` sql("select * from ta where id in ((select id from tb))").show() ``` The root cause is that, for `InSubquery` expression, it does not find a common type for two decimalType like `In` expression. Besides that, for `InSubquery` expression, it also does not find a common type for DecimalType and double/float/bigInt. In this PR, I fix this issue by finding widerType for `InSubquery` expression when DecimalType is involved. ### Why are the changes needed? Some InSubquery would throw dataType mismatch exception. ### Does this PR introduce any user-facing change? No. ### How was this patch tested? Unit test. Closes apache#26485 from turboFei/SPARK-29860-in-subquery. Authored-by: turbofei <[email protected]> Signed-off-by: Wenchen Fan <[email protected]>
1 parent 0bd8b99 commit 0ab922c

File tree

4 files changed

+26
-6
lines changed

4 files changed

+26
-6
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -470,8 +470,7 @@ object TypeCoercion {
470470
val rhs = sub.output
471471

472472
val commonTypes = lhs.zip(rhs).flatMap { case (l, r) =>
473-
findCommonTypeForBinaryComparison(l.dataType, r.dataType, conf)
474-
.orElse(findTightestCommonType(l.dataType, r.dataType))
473+
findWiderTypeForTwo(l.dataType, r.dataType)
475474
}
476475

477476
// The number of columns/expressions must match between LHS and RHS of an

sql/core/src/test/resources/sql-tests/inputs/subquery/negative-cases/subq-input-typecheck.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ CREATE TEMPORARY VIEW t4 AS SELECT * FROM VALUES
1818
AS t1(t4a, t4b, t4c);
1919

2020
CREATE TEMPORARY VIEW t5 AS SELECT * FROM VALUES
21-
(CAST(1 AS DECIMAL(18, 0)), CAST(2 AS STRING), CAST(3 AS BIGINT))
21+
(CAST('2011-01-01 01:01:01' AS TIMESTAMP), CAST(2 AS STRING), CAST(3 AS BIGINT))
2222
AS t1(t5a, t5b, t5c);
2323

2424
-- TC 01.01

sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/subq-input-typecheck.sql.out

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ struct<>
4444

4545
-- !query 4
4646
CREATE TEMPORARY VIEW t5 AS SELECT * FROM VALUES
47-
(CAST(1 AS DECIMAL(18, 0)), CAST(2 AS STRING), CAST(3 AS BIGINT))
47+
(CAST('2011-01-01 01:01:01' AS TIMESTAMP), CAST(2 AS STRING), CAST(3 AS BIGINT))
4848
AS t1(t5a, t5b, t5c)
4949
-- !query 4 schema
5050
struct<>
@@ -139,8 +139,8 @@ cannot resolve '(named_struct('t4a', t4.`t4a`, 't4b', t4.`t4b`, 't4c', t4.`t4c`)
139139
The data type of one or more elements in the left hand side of an IN subquery
140140
is not compatible with the data type of the output of the subquery
141141
Mismatched columns:
142-
[(t4.`t4a`:double, t5.`t5a`:decimal(18,0)), (t4.`t4c`:string, t5.`t5c`:bigint)]
142+
[(t4.`t4a`:double, t5.`t5a`:timestamp), (t4.`t4c`:string, t5.`t5c`:bigint)]
143143
Left side:
144144
[double, string, string].
145145
Right side:
146-
[decimal(18,0), string, bigint].;
146+
[timestamp, string, bigint].;

sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3313,6 +3313,27 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession {
33133313
cubeDF.join(cubeDF, "nums"),
33143314
Row(1, 0, 0) :: Row(2, 0, 0) :: Row(3, 0, 0) :: Nil)
33153315
}
3316+
3317+
test("SPARK-29860: Fix dataType mismatch issue for InSubquery") {
3318+
withTempView("ta", "tb", "tc", "td", "te", "tf") {
3319+
sql("CREATE TEMPORARY VIEW ta AS SELECT * FROM VALUES(CAST(1 AS DECIMAL(8, 0))) AS ta(id)")
3320+
sql("CREATE TEMPORARY VIEW tb AS SELECT * FROM VALUES(CAST(1 AS DECIMAL(7, 2))) AS tb(id)")
3321+
sql("CREATE TEMPORARY VIEW tc AS SELECT * FROM VALUES(CAST(1 AS DOUBLE)) AS tc(id)")
3322+
sql("CREATE TEMPORARY VIEW td AS SELECT * FROM VALUES(CAST(1 AS FLOAT)) AS td(id)")
3323+
sql("CREATE TEMPORARY VIEW te AS SELECT * FROM VALUES(CAST(1 AS BIGINT)) AS te(id)")
3324+
sql("CREATE TEMPORARY VIEW tf AS SELECT * FROM VALUES(CAST(1 AS DECIMAL(38, 38))) AS tf(id)")
3325+
val df1 = sql("SELECT id FROM ta WHERE id IN (SELECT id FROM tb)")
3326+
checkAnswer(df1, Row(new java.math.BigDecimal(1)))
3327+
val df2 = sql("SELECT id FROM ta WHERE id IN (SELECT id FROM tc)")
3328+
checkAnswer(df2, Row(new java.math.BigDecimal(1)))
3329+
val df3 = sql("SELECT id FROM ta WHERE id IN (SELECT id FROM td)")
3330+
checkAnswer(df3, Row(new java.math.BigDecimal(1)))
3331+
val df4 = sql("SELECT id FROM ta WHERE id IN (SELECT id FROM te)")
3332+
checkAnswer(df4, Row(new java.math.BigDecimal(1)))
3333+
val df5 = sql("SELECT id FROM ta WHERE id IN (SELECT id FROM tf)")
3334+
checkAnswer(df5, Array.empty[Row])
3335+
}
3336+
}
33163337
}
33173338

33183339
case class Foo(bar: Option[String])

0 commit comments

Comments
 (0)