Skip to content

Commit 1122b87

Browse files
robert3005bulldozer-bot[bot]
authored andcommitted
Use SMART resolver strategy when parsing timestamps and dates (apache-spark-on-k8s#504)
This one is really hard to deal with. Since internally we have used `yyyy-MM-dd HH:mm:ss.S` as a default SimpleDateFormat there's no way to keep STRICT parsing and not accidentally cause a failure. This will require rewriting data with any timestamps that have csv
1 parent 109f910 commit 1122b87

File tree

3 files changed

+6
-3
lines changed

3 files changed

+6
-3
lines changed

FORK.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
* yarn: YarnClusterSchedulerBackend, YarnSchedulerBackend
2020

2121
* [SPARK-26626](https://issues.apache.org/jira/browse/SPARK-26626) - Limited the maximum size of repeatedly substituted aliases
22+
* [SPARK-26178](https://issues.apache.org/jira/browse/SPARK-26178) (partial) - Do not use STRICT strategy when parsing timestamps
2223

2324
# Added
2425

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,6 @@ private object DateTimeFormatterHelper {
7373
.parseDefaulting(ChronoField.SECOND_OF_MINUTE, 0)
7474
.toFormatter(locale)
7575
.withChronology(IsoChronology.INSTANCE)
76-
.withResolverStyle(ResolverStyle.STRICT)
76+
.withResolverStyle(ResolverStyle.SMART)
7777
}
7878
}

sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -568,9 +568,10 @@ class DateFunctionsSuite extends QueryTest with SharedSQLContext {
568568
val y1 = "2016-02-29"
569569
val y2 = "2017-02-29"
570570
val ts5 = Timestamp.valueOf("2016-02-29 00:00:00")
571+
val ts6 = Timestamp.valueOf("2017-02-28 00:00:00")
571572
val df2 = Seq(y1, y2).toDF("y")
572573
checkAnswer(df2.select(unix_timestamp(col("y"), "yyyy-MM-dd")), Seq(
573-
Row(ts5.getTime / 1000L), Row(null)))
574+
Row(ts5.getTime / 1000L), Row(ts6.getTime / 1000L)))
574575

575576
val now = sql("select unix_timestamp()").collect().head.getLong(0)
576577
checkAnswer(sql(s"select cast ($now as timestamp)"), Row(new java.util.Date(now * 1000)))
@@ -613,9 +614,10 @@ class DateFunctionsSuite extends QueryTest with SharedSQLContext {
613614
val y1 = "2016-02-29"
614615
val y2 = "2017-02-29"
615616
val ts5 = Timestamp.valueOf("2016-02-29 00:00:00")
617+
val ts6 = Timestamp.valueOf("2017-02-28 00:00:00")
616618
val df2 = Seq(y1, y2).toDF("y")
617619
checkAnswer(df2.select(unix_timestamp(col("y"), "yyyy-MM-dd")), Seq(
618-
Row(ts5.getTime / 1000L), Row(null)))
620+
Row(ts5.getTime / 1000L), Row(ts6.getTime / 1000L)))
619621

620622
// invalid format
621623
checkAnswer(df1.selectExpr(s"to_unix_timestamp(x, 'yyyy-MM-dd bb:HH:ss')"), Seq(

0 commit comments

Comments
 (0)