Skip to content

Commit a529be2

Browse files
MaxGekkHyukjinKwon
authored andcommitted
[SPARK-27212][SQL] Eliminate TimeZone to ZoneId conversion in stringToTimestamp
## What changes were proposed in this pull request? In the PR, I propose to avoid the `TimeZone` to `ZoneId` conversion in `DateTimeUtils.stringToTimestamp` by changing signature of the method, and require a parameter of `ZoneId` type. This will allow to avoid unnecessary conversion (`TimeZone` -> `String` -> `ZoneId`) per each row. Also the PR avoids creation of `ZoneId` instances from `ZoneOffset` because `ZoneOffset` is a sub-class, and the conversion is unnecessary too. ## How was this patch tested? It was tested by `DateTimeUtilsSuite` and `CastSuite`. Closes apache#24155 from MaxGekk/stringtotimestamp-zoneid. Authored-by: Maxim Gekk <[email protected]> Signed-off-by: Hyukjin Kwon <[email protected]>
1 parent 9f58d3b commit a529be2

File tree

7 files changed

+47
-40
lines changed

7 files changed

+47
-40
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -364,7 +364,7 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
364364
// TimestampConverter
365365
private[this] def castToTimestamp(from: DataType): Any => Any = from match {
366366
case StringType =>
367-
buildCast[UTF8String](_, utfs => DateTimeUtils.stringToTimestamp(utfs, timeZone).orNull)
367+
buildCast[UTF8String](_, utfs => DateTimeUtils.stringToTimestamp(utfs, zoneId).orNull)
368368
case BooleanType =>
369369
buildCast[Boolean](_, b => if (b) 1L else 0)
370370
case LongType =>
@@ -1017,12 +1017,12 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
10171017
from: DataType,
10181018
ctx: CodegenContext): CastFunction = from match {
10191019
case StringType =>
1020-
val tz = JavaCode.global(ctx.addReferenceObj("timeZone", timeZone), timeZone.getClass)
1020+
val zid = ctx.addReferenceObj("zoneId", zoneId, "java.time.ZoneId")
10211021
val longOpt = ctx.freshVariable("longOpt", classOf[Option[Long]])
10221022
(c, evPrim, evNull) =>
10231023
code"""
10241024
scala.Option<Long> $longOpt =
1025-
org.apache.spark.sql.catalyst.util.DateTimeUtils.stringToTimestamp($c, $tz);
1025+
org.apache.spark.sql.catalyst.util.DateTimeUtils.stringToTimestamp($c, $zid);
10261026
if ($longOpt.isDefined()) {
10271027
$evPrim = ((Long) $longOpt.get()).longValue();
10281028
} else {

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ import org.apache.spark.sql.catalyst.expressions.aggregate.{First, Last}
3636
import org.apache.spark.sql.catalyst.parser.SqlBaseParser._
3737
import org.apache.spark.sql.catalyst.plans._
3838
import org.apache.spark.sql.catalyst.plans.logical._
39-
import org.apache.spark.sql.catalyst.util.DateTimeUtils.{getTimeZone, stringToDate, stringToTimestamp}
39+
import org.apache.spark.sql.catalyst.util.DateTimeUtils.{getZoneId, stringToDate, stringToTimestamp}
4040
import org.apache.spark.sql.internal.SQLConf
4141
import org.apache.spark.sql.types._
4242
import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
@@ -1593,8 +1593,8 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
15931593
valueType match {
15941594
case "DATE" => toLiteral(stringToDate, DateType)
15951595
case "TIMESTAMP" =>
1596-
val timeZone = getTimeZone(SQLConf.get.sessionLocalTimeZone)
1597-
toLiteral(stringToTimestamp(_, timeZone), TimestampType)
1596+
val zoneId = getZoneId(SQLConf.get.sessionLocalTimeZone)
1597+
toLiteral(stringToTimestamp(_, zoneId), TimestampType)
15981598
case "X" =>
15991599
val padding = if (value.length % 2 != 0) "0" else ""
16001600
Literal(DatatypeConverter.parseHexBinary(padding + value))

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -211,7 +211,7 @@ object DateTimeUtils {
211211
* `T[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]-[h]h:[m]m`
212212
* `T[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]+[h]h:[m]m`
213213
*/
214-
def stringToTimestamp(s: UTF8String, timeZone: TimeZone): Option[SQLTimestamp] = {
214+
def stringToTimestamp(s: UTF8String, timeZoneId: ZoneId): Option[SQLTimestamp] = {
215215
if (s == null) {
216216
return None
217217
}
@@ -320,10 +320,10 @@ object DateTimeUtils {
320320
}
321321
try {
322322
val zoneId = if (tz.isEmpty) {
323-
timeZone.toZoneId
323+
timeZoneId
324324
} else {
325325
val sign = if (tz.get.toChar == '-') -1 else 1
326-
ZoneId.ofOffset("GMT", ZoneOffset.ofHoursMinutes(sign * segments(7), sign * segments(8)))
326+
ZoneOffset.ofHoursMinutes(sign * segments(7), sign * segments(8))
327327
}
328328
val nanoseconds = MICROSECONDS.toNanos(segments(6))
329329
val localTime = LocalTime.of(segments(3), segments(4), segments(5), nanoseconds.toInt)
@@ -411,7 +411,7 @@ object DateTimeUtils {
411411
segments(i) = currentSegmentValue
412412
try {
413413
val localDate = LocalDate.of(segments(0), segments(1), segments(2))
414-
val instant = localDate.atStartOfDay(TimeZoneUTC.toZoneId).toInstant
414+
val instant = localDate.atStartOfDay(ZoneOffset.UTC).toInstant
415415
Some(instantToDays(instant))
416416
} catch {
417417
case NonFatal(_) => None

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HashExpressionsSuite.scala

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
package org.apache.spark.sql.catalyst.expressions
1919

2020
import java.nio.charset.StandardCharsets
21-
import java.util.TimeZone
21+
import java.time.{ZoneId, ZoneOffset}
2222

2323
import scala.collection.mutable.ArrayBuffer
2424

@@ -208,9 +208,9 @@ class HashExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
208208
def checkHiveHashForTimestampType(
209209
timestamp: String,
210210
expected: Long,
211-
timeZone: TimeZone = TimeZone.getTimeZone("UTC")): Unit = {
211+
zoneId: ZoneId = ZoneOffset.UTC): Unit = {
212212
checkHiveHash(
213-
DateTimeUtils.stringToTimestamp(UTF8String.fromString(timestamp), timeZone).get,
213+
DateTimeUtils.stringToTimestamp(UTF8String.fromString(timestamp), zoneId).get,
214214
TimestampType,
215215
expected)
216216
}
@@ -223,7 +223,7 @@ class HashExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
223223

224224
// with different timezone
225225
checkHiveHashForTimestampType("2017-02-24 10:56:29", 1445732471,
226-
TimeZone.getTimeZone("US/Pacific"))
226+
DateTimeUtils.getZoneId("US/Pacific"))
227227

228228
// boundary cases
229229
checkHiveHashForTimestampType("0001-01-01 00:00:00", 1645969984)

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala

Lines changed: 24 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ package org.apache.spark.sql.catalyst.util
1919

2020
import java.sql.{Date, Timestamp}
2121
import java.text.SimpleDateFormat
22+
import java.time.ZoneId
2223
import java.util.{Locale, TimeZone}
2324
import java.util.concurrent.TimeUnit
2425

@@ -31,12 +32,13 @@ class DateTimeUtilsSuite extends SparkFunSuite {
3132

3233
val TimeZonePST = TimeZone.getTimeZone("PST")
3334
private def defaultTz = DateTimeUtils.defaultTimeZone()
35+
private def defaultZoneId = ZoneId.systemDefault()
3436

3537
test("nanoseconds truncation") {
3638
val tf = TimestampFormatter.getFractionFormatter(DateTimeUtils.defaultTimeZone.toZoneId)
3739
def checkStringToTimestamp(originalTime: String, expectedParsedTime: String) {
3840
val parsedTimestampOp = DateTimeUtils.stringToTimestamp(
39-
UTF8String.fromString(originalTime), defaultTz)
41+
UTF8String.fromString(originalTime), defaultZoneId)
4042
assert(parsedTimestampOp.isDefined, "timestamp with nanoseconds was not parsed correctly")
4143
assert(DateTimeUtils.timestampToString(tf, parsedTimestampOp.get) === expectedParsedTime)
4244
}
@@ -141,7 +143,7 @@ class DateTimeUtilsSuite extends SparkFunSuite {
141143
test("string to timestamp") {
142144
for (tz <- ALL_TIMEZONES) {
143145
def checkStringToTimestamp(str: String, expected: Option[Long]): Unit = {
144-
assert(stringToTimestamp(UTF8String.fromString(str), tz) === expected)
146+
assert(stringToTimestamp(UTF8String.fromString(str), tz.toZoneId) === expected)
145147
}
146148

147149
checkStringToTimestamp("1969-12-31 16:00:00", Option(date(1969, 12, 31, 16, tz = tz)))
@@ -261,11 +263,11 @@ class DateTimeUtilsSuite extends SparkFunSuite {
261263

262264
// Test stringToTimestamp
263265
assert(stringToTimestamp(
264-
UTF8String.fromString("2015-02-29 00:00:00"), defaultTz).isEmpty)
266+
UTF8String.fromString("2015-02-29 00:00:00"), defaultZoneId).isEmpty)
265267
assert(stringToTimestamp(
266-
UTF8String.fromString("2015-04-31 00:00:00"), defaultTz).isEmpty)
267-
assert(stringToTimestamp(UTF8String.fromString("2015-02-29"), defaultTz).isEmpty)
268-
assert(stringToTimestamp(UTF8String.fromString("2015-04-31"), defaultTz).isEmpty)
268+
UTF8String.fromString("2015-04-31 00:00:00"), defaultZoneId).isEmpty)
269+
assert(stringToTimestamp(UTF8String.fromString("2015-02-29"), defaultZoneId).isEmpty)
270+
assert(stringToTimestamp(UTF8String.fromString("2015-04-31"), defaultZoneId).isEmpty)
269271
}
270272

271273
test("hours") {
@@ -450,20 +452,20 @@ class DateTimeUtilsSuite extends SparkFunSuite {
450452
val truncated =
451453
DateTimeUtils.truncTimestamp(inputTS, level, timezone)
452454
val expectedTS =
453-
DateTimeUtils.stringToTimestamp(UTF8String.fromString(expected), defaultTz)
455+
DateTimeUtils.stringToTimestamp(UTF8String.fromString(expected), defaultZoneId)
454456
assert(truncated === expectedTS.get)
455457
}
456458

457-
val defaultInputTS =
458-
DateTimeUtils.stringToTimestamp(UTF8String.fromString("2015-03-05T09:32:05.359"), defaultTz)
459-
val defaultInputTS1 =
460-
DateTimeUtils.stringToTimestamp(UTF8String.fromString("2015-03-31T20:32:05.359"), defaultTz)
461-
val defaultInputTS2 =
462-
DateTimeUtils.stringToTimestamp(UTF8String.fromString("2015-04-01T02:32:05.359"), defaultTz)
463-
val defaultInputTS3 =
464-
DateTimeUtils.stringToTimestamp(UTF8String.fromString("2015-03-30T02:32:05.359"), defaultTz)
465-
val defaultInputTS4 =
466-
DateTimeUtils.stringToTimestamp(UTF8String.fromString("2015-03-29T02:32:05.359"), defaultTz)
459+
val defaultInputTS = DateTimeUtils.stringToTimestamp(
460+
UTF8String.fromString("2015-03-05T09:32:05.359"), defaultZoneId)
461+
val defaultInputTS1 = DateTimeUtils.stringToTimestamp(
462+
UTF8String.fromString("2015-03-31T20:32:05.359"), defaultZoneId)
463+
val defaultInputTS2 = DateTimeUtils.stringToTimestamp(
464+
UTF8String.fromString("2015-04-01T02:32:05.359"), defaultZoneId)
465+
val defaultInputTS3 = DateTimeUtils.stringToTimestamp(
466+
UTF8String.fromString("2015-03-30T02:32:05.359"), defaultZoneId)
467+
val defaultInputTS4 = DateTimeUtils.stringToTimestamp(
468+
UTF8String.fromString("2015-03-29T02:32:05.359"), defaultZoneId)
467469

468470
testTrunc(DateTimeUtils.TRUNC_TO_YEAR, "2015-01-01T00:00:00", defaultInputTS.get)
469471
testTrunc(DateTimeUtils.TRUNC_TO_MONTH, "2015-03-01T00:00:00", defaultInputTS.get)
@@ -483,15 +485,15 @@ class DateTimeUtilsSuite extends SparkFunSuite {
483485
for (tz <- ALL_TIMEZONES) {
484486
withDefaultTimeZone(tz) {
485487
val inputTS = DateTimeUtils.stringToTimestamp(
486-
UTF8String.fromString("2015-03-05T09:32:05.359"), defaultTz)
488+
UTF8String.fromString("2015-03-05T09:32:05.359"), defaultZoneId)
487489
val inputTS1 = DateTimeUtils.stringToTimestamp(
488-
UTF8String.fromString("2015-03-31T20:32:05.359"), defaultTz)
490+
UTF8String.fromString("2015-03-31T20:32:05.359"), defaultZoneId)
489491
val inputTS2 = DateTimeUtils.stringToTimestamp(
490-
UTF8String.fromString("2015-04-01T02:32:05.359"), defaultTz)
492+
UTF8String.fromString("2015-04-01T02:32:05.359"), defaultZoneId)
491493
val inputTS3 = DateTimeUtils.stringToTimestamp(
492-
UTF8String.fromString("2015-03-30T02:32:05.359"), defaultTz)
494+
UTF8String.fromString("2015-03-30T02:32:05.359"), defaultZoneId)
493495
val inputTS4 = DateTimeUtils.stringToTimestamp(
494-
UTF8String.fromString("2015-03-29T02:32:05.359"), defaultTz)
496+
UTF8String.fromString("2015-03-29T02:32:05.359"), defaultZoneId)
495497

496498
testTrunc(DateTimeUtils.TRUNC_TO_YEAR, "2015-01-01T00:00:00", inputTS.get, tz)
497499
testTrunc(DateTimeUtils.TRUNC_TO_MONTH, "2015-03-01T00:00:00", inputTS.get, tz)

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/UnsafeArraySuite.scala

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717

1818
package org.apache.spark.sql.catalyst.util
1919

20+
import java.time.ZoneId
21+
2022
import org.apache.spark.SparkFunSuite
2123
import org.apache.spark.sql.Row
2224
import org.apache.spark.sql.catalyst.encoders.{ExpressionEncoder, RowEncoder}
@@ -37,9 +39,12 @@ class UnsafeArraySuite extends SparkFunSuite {
3739
DateTimeUtils.stringToDate(UTF8String.fromString("1970-1-1")).get,
3840
DateTimeUtils.stringToDate(UTF8String.fromString("2016-7-26")).get)
3941
private def defaultTz = DateTimeUtils.defaultTimeZone()
42+
private def defaultZoneId = ZoneId.systemDefault()
4043
val timestampArray = Array(
41-
DateTimeUtils.stringToTimestamp(UTF8String.fromString("1970-1-1 00:00:00"), defaultTz).get,
42-
DateTimeUtils.stringToTimestamp(UTF8String.fromString("2016-7-26 00:00:00"), defaultTz).get)
44+
DateTimeUtils.stringToTimestamp(
45+
UTF8String.fromString("1970-1-1 00:00:00"), defaultZoneId).get,
46+
DateTimeUtils.stringToTimestamp(
47+
UTF8String.fromString("2016-7-26 00:00:00"), defaultZoneId).get)
4348
val decimalArray4_1 = Array(
4449
BigDecimal("123.4").setScale(1, BigDecimal.RoundingMode.FLOOR),
4550
BigDecimal("567.8").setScale(1, BigDecimal.RoundingMode.FLOOR))

sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ import org.apache.spark.rdd.RDD
2525
import org.apache.spark.sql.{AnalysisException, DataFrame, Row, SaveMode, SparkSession, SQLContext}
2626
import org.apache.spark.sql.catalyst.analysis._
2727
import org.apache.spark.sql.catalyst.util.{DateFormatter, DateTimeUtils, TimestampFormatter}
28-
import org.apache.spark.sql.catalyst.util.DateTimeUtils.{getTimeZone, stringToDate, stringToTimestamp}
28+
import org.apache.spark.sql.catalyst.util.DateTimeUtils.{getZoneId, stringToDate, stringToTimestamp}
2929
import org.apache.spark.sql.internal.SQLConf
3030
import org.apache.spark.sql.jdbc.JdbcDialects
3131
import org.apache.spark.sql.sources._
@@ -187,7 +187,7 @@ private[sql] object JDBCRelation extends Logging {
187187
columnType match {
188188
case _: NumericType => value.toLong
189189
case DateType => parse(stringToDate).toLong
190-
case TimestampType => parse(stringToTimestamp(_, getTimeZone(timeZoneId)))
190+
case TimestampType => parse(stringToTimestamp(_, getZoneId(timeZoneId)))
191191
}
192192
}
193193

0 commit comments

Comments
 (0)