Skip to content

Commit 9e9fa2f

Browse files
wangyumdongjoon-hyun
authored andcommitted
[SPARK-25098][SQL] Trim the string when cast stringToTimestamp and stringToDate
## What changes were proposed in this pull request? **Hive** and **Oracle** trim the string when cast `stringToTimestamp` and `stringToDate`. this PR support this feature: ![image](https://user-images.githubusercontent.com/5399861/47979721-793b1e80-e0ff-11e8-97c8-24b10950ee9e.png) ![image](https://user-images.githubusercontent.com/5399861/47979725-7dffd280-e0ff-11e8-87d4-5767a00ed46e.png) ## How was this patch tested? unit tests Closes apache#22089 Closes apache#22943 from wangyum/SPARK-25098. Authored-by: Yuming Wang <[email protected]> Signed-off-by: Dongjoon Hyun <[email protected]>
1 parent 76813cf commit 9e9fa2f

File tree

2 files changed

+12
-17
lines changed

2 files changed

+12
-17
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -274,7 +274,7 @@ object DateTimeUtils {
274274
}
275275

276276
/**
277-
* Parses a given UTF8 date string to the corresponding a corresponding [[Long]] value.
277+
* Trim and parse a given UTF8 date string to the corresponding a corresponding [[Long]] value.
278278
* The return type is [[Option]] in order to distinguish between 0L and null. The following
279279
* formats are allowed:
280280
*
@@ -311,7 +311,7 @@ object DateTimeUtils {
311311
val segments: Array[Int] = Array[Int](1, 1, 1, 0, 0, 0, 0, 0, 0)
312312
var i = 0
313313
var currentSegmentValue = 0
314-
val bytes = s.getBytes
314+
val bytes = s.trim.getBytes
315315
var j = 0
316316
var digitsMilli = 0
317317
var justTime = false
@@ -441,7 +441,7 @@ object DateTimeUtils {
441441
}
442442

443443
/**
444-
* Parses a given UTF8 date string to a corresponding [[Int]] value.
444+
* Trim and parse a given UTF8 date string to a corresponding [[Int]] value.
445445
* The return type is [[Option]] in order to distinguish between 0 and null. The following
446446
* formats are allowed:
447447
*
@@ -459,7 +459,7 @@ object DateTimeUtils {
459459
val segments: Array[Int] = Array[Int](1, 1, 1)
460460
var i = 0
461461
var currentSegmentValue = 0
462-
val bytes = s.getBytes
462+
val bytes = s.trim.getBytes
463463
var j = 0
464464
while (j < bytes.length && (i < 3 && !(bytes(j) == ' ' || bytes(j) == 'T'))) {
465465
val b = bytes(j)

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala

Lines changed: 8 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -140,16 +140,10 @@ class DateTimeUtilsSuite extends SparkFunSuite {
140140
c = Calendar.getInstance()
141141
c.set(2015, 2, 18, 0, 0, 0)
142142
c.set(Calendar.MILLISECOND, 0)
143-
assert(stringToDate(UTF8String.fromString("2015-03-18")).get ===
144-
millisToDays(c.getTimeInMillis))
145-
assert(stringToDate(UTF8String.fromString("2015-03-18 ")).get ===
146-
millisToDays(c.getTimeInMillis))
147-
assert(stringToDate(UTF8String.fromString("2015-03-18 123142")).get ===
148-
millisToDays(c.getTimeInMillis))
149-
assert(stringToDate(UTF8String.fromString("2015-03-18T123123")).get ===
150-
millisToDays(c.getTimeInMillis))
151-
assert(stringToDate(UTF8String.fromString("2015-03-18T")).get ===
152-
millisToDays(c.getTimeInMillis))
143+
Seq("2015-03-18", "2015-03-18 ", " 2015-03-18", " 2015-03-18 ", "2015-03-18 123142",
144+
"2015-03-18T123123", "2015-03-18T").foreach { s =>
145+
assert(stringToDate(UTF8String.fromString(s)).get === millisToDays(c.getTimeInMillis))
146+
}
153147

154148
assert(stringToDate(UTF8String.fromString("2015-03-18X")).isEmpty)
155149
assert(stringToDate(UTF8String.fromString("2015/03/18")).isEmpty)
@@ -214,9 +208,10 @@ class DateTimeUtilsSuite extends SparkFunSuite {
214208
c = Calendar.getInstance(tz)
215209
c.set(2015, 2, 18, 0, 0, 0)
216210
c.set(Calendar.MILLISECOND, 0)
217-
checkStringToTimestamp("2015-03-18", Option(c.getTimeInMillis * 1000))
218-
checkStringToTimestamp("2015-03-18 ", Option(c.getTimeInMillis * 1000))
219-
checkStringToTimestamp("2015-03-18T", Option(c.getTimeInMillis * 1000))
211+
212+
Seq("2015-03-18", "2015-03-18 ", " 2015-03-18", " 2015-03-18 ", "2015-03-18T").foreach { s =>
213+
checkStringToTimestamp(s, Option(c.getTimeInMillis * 1000))
214+
}
220215

221216
c = Calendar.getInstance(tz)
222217
c.set(2015, 2, 18, 12, 3, 17)

0 commit comments

Comments
 (0)