Skip to content

Commit 54fcaaf

Browse files
MaxGekkgatorsmile
authored andcommitted
[SPARK-24571][SQL] Support Char literals
## What changes were proposed in this pull request? In the PR, I propose to automatically convert a `Literal` with `Char` type to a `Literal` of `String` type. Currently, the following code: ```scala val df = Seq("Amsterdam", "San Francisco", "London").toDF("city") df.where($"city".contains('o')).show(false) ``` fails with the exception: ``` Unsupported literal type class java.lang.Character o java.lang.RuntimeException: Unsupported literal type class java.lang.Character o at org.apache.spark.sql.catalyst.expressions.Literal$.apply(literals.scala:78) ``` The PR fixes this issue by converting `char` to `string` of length `1`. I believe it makes sense to does not differentiate `char` and `string(1)` in _a unified, multi-language data platform_ like Spark which supports languages like Python/R. Author: Maxim Gekk <[email protected]> Author: Maxim Gekk <[email protected]> Closes apache#21578 from MaxGekk/support-char-literals.
1 parent 9de11d3 commit 54fcaaf

File tree

5 files changed

+25
-0
lines changed

5 files changed

+25
-0
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -286,6 +286,7 @@ object CatalystTypeConverters {
286286
override def toCatalystImpl(scalaValue: Any): UTF8String = scalaValue match {
287287
case str: String => UTF8String.fromString(str)
288288
case utf8: UTF8String => utf8
289+
case chr: Char => UTF8String.fromString(chr.toString)
289290
case other => throw new IllegalArgumentException(
290291
s"The value (${other.toString}) of the type (${other.getClass.getCanonicalName}) "
291292
+ s"cannot be converted to the string type")

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ object Literal {
5757
case b: Byte => Literal(b, ByteType)
5858
case s: Short => Literal(s, ShortType)
5959
case s: String => Literal(UTF8String.fromString(s), StringType)
60+
case c: Char => Literal(UTF8String.fromString(c.toString), StringType)
6061
case b: Boolean => Literal(b, BooleanType)
6162
case d: BigDecimal => Literal(Decimal(d), DecimalType.fromBigDecimal(d))
6263
case d: JavaBigDecimal =>

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystTypeConvertersSuite.scala

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import org.apache.spark.sql.Row
2222
import org.apache.spark.sql.catalyst.expressions.UnsafeArrayData
2323
import org.apache.spark.sql.catalyst.util.GenericArrayData
2424
import org.apache.spark.sql.types._
25+
import org.apache.spark.unsafe.types.UTF8String
2526

2627
class CatalystTypeConvertersSuite extends SparkFunSuite {
2728

@@ -139,4 +140,11 @@ class CatalystTypeConvertersSuite extends SparkFunSuite {
139140
assert(exception.getMessage.contains("The value (0.1) of the type "
140141
+ "(java.lang.Double) cannot be converted to the string type"))
141142
}
143+
144+
test("SPARK-24571: convert Char to String") {
145+
val chr: Char = 'X'
146+
val converter = CatalystTypeConverters.createToCatalystConverter(StringType)
147+
val expected = UTF8String.fromString("X")
148+
assert(converter(chr) === expected)
149+
}
142150
}

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -219,4 +219,11 @@ class LiteralExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {
219219
checkUnsupportedTypeInLiteral(Map("key1" -> 1, "key2" -> 2))
220220
checkUnsupportedTypeInLiteral(("mike", 29, 1.0))
221221
}
222+
223+
test("SPARK-24571: char literals") {
224+
checkEvaluation(Literal('X'), "X")
225+
checkEvaluation(Literal.create('0'), "0")
226+
checkEvaluation(Literal('\u0000'), "\u0000")
227+
checkEvaluation(Literal.create('\n'), "\n")
228+
}
222229
}

sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1479,6 +1479,14 @@ class DatasetSuite extends QueryTest with SharedSQLContext {
14791479
assert(ds1.schema == ds2.schema)
14801480
checkDataset(ds1.select("_2._2"), ds2.select("_2._2").collect(): _*)
14811481
}
1482+
1483+
test("SPARK-24571: filtering of string values by char literal") {
1484+
val df = Seq("Amsterdam", "San Francisco", "X").toDF("city")
1485+
checkAnswer(df.where('city === 'X'), Seq(Row("X")))
1486+
checkAnswer(
1487+
df.where($"city".contains(new java.lang.Character('A'))),
1488+
Seq(Row("Amsterdam")))
1489+
}
14821490
}
14831491

14841492
case class TestDataUnion(x: Int, y: Int, z: Int)

0 commit comments

Comments
 (0)