Skip to content

Commit 2893999

Browse files
committed
[SPARK-51447][SQL] Add stringToTime and stringToTimeAnsi
### What changes were proposed in this pull request? In the PR, I propose two new functions to SparkDateTimeUtils: 1. `stringToTime()` parses an input string to micros since the midnight using `[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]`. This pattern is the same as the time pattern in `stringToTimestamp()` 2. `stringToTimeAnsi` is similar to the above function but raises the `CAST_INVALID_INPUT` error when it cannot parse the input. ### Why are the changes needed? This functions should be used in the cases when time pattern is not set during parsing: casting, time literals, datasources and so on. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? By running new tests: ``` $ build/sbt "test:testOnly *DateTimeUtilsSuite" ``` ### Was this patch authored or co-authored using generative AI tooling? No. Closes #50220 from MaxGekk/time-stringToTime. Authored-by: Max Gekk <max.gekk@gmail.com> Signed-off-by: Max Gekk <max.gekk@gmail.com>
1 parent f11bc75 commit 2893999

File tree

2 files changed

+75
-6
lines changed

2 files changed

+75
-6
lines changed

sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ import org.apache.spark.QueryContext
3030
import org.apache.spark.sql.catalyst.util.DateTimeConstants._
3131
import org.apache.spark.sql.catalyst.util.RebaseDateTime.{rebaseGregorianToJulianDays, rebaseGregorianToJulianMicros, rebaseJulianToGregorianDays, rebaseJulianToGregorianMicros}
3232
import org.apache.spark.sql.errors.ExecutionErrors
33-
import org.apache.spark.sql.types.{DateType, TimestampType}
33+
import org.apache.spark.sql.types.{DateType, TimestampType, TimeType}
3434
import org.apache.spark.unsafe.types.UTF8String
3535
import org.apache.spark.util.SparkClassUtils
3636

@@ -660,6 +660,35 @@ trait SparkDateTimeUtils {
660660
}
661661
}
662662

663+
/**
664+
* Trims and parses a given UTF8 string to a corresponding [[Long]] value which representing the
665+
* number of microseconds since the midnight. The result will be independent of time zones.
666+
*
667+
* The return type is [[Option]] in order to distinguish between 0L and null. Please refer to
668+
* `parseTimestampString` for the allowed formats.
669+
*/
670+
def stringToTime(s: UTF8String): Option[Long] = {
671+
try {
672+
val (segments, zoneIdOpt, justTime) = parseTimestampString(s)
673+
// If the input string can't be parsed as a time, or it contains not only
674+
// the time part or has time zone information, return None.
675+
if (segments.isEmpty || !justTime || zoneIdOpt.isDefined) {
676+
return None
677+
}
678+
val nanoseconds = MICROSECONDS.toNanos(segments(6))
679+
val localTime = LocalTime.of(segments(3), segments(4), segments(5), nanoseconds.toInt)
680+
Some(localTimeToMicros(localTime))
681+
} catch {
682+
case NonFatal(_) => None
683+
}
684+
}
685+
686+
def stringToTimeAnsi(s: UTF8String, context: QueryContext = null): Long = {
687+
stringToTime(s).getOrElse {
688+
throw ExecutionErrors.invalidInputInCastToDatetimeError(s, TimeType(), context)
689+
}
690+
}
691+
663692
/**
664693
* Returns the index of the first non-whitespace and non-ISO control character in the byte
665694
* array.

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala

Lines changed: 45 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ import java.util.concurrent.TimeUnit
2626
import org.scalatest.matchers.must.Matchers
2727
import org.scalatest.matchers.should.Matchers._
2828

29-
import org.apache.spark.{SparkFunSuite, SparkIllegalArgumentException}
29+
import org.apache.spark.{SparkDateTimeException, SparkFunSuite, SparkIllegalArgumentException}
3030
import org.apache.spark.sql.catalyst.plans.SQLHelper
3131
import org.apache.spark.sql.catalyst.util.DateTimeConstants._
3232
import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._
@@ -763,10 +763,10 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper {
763763
}
764764

765765
test("SPARK-35664: microseconds to LocalDateTime") {
766-
assert(microsToLocalDateTime(0) == LocalDateTime.parse("1970-01-01T00:00:00"))
767-
assert(microsToLocalDateTime(100) == LocalDateTime.parse("1970-01-01T00:00:00.0001"))
768-
assert(microsToLocalDateTime(100000000) == LocalDateTime.parse("1970-01-01T00:01:40"))
769-
assert(microsToLocalDateTime(100000000000L) == LocalDateTime.parse("1970-01-02T03:46:40"))
766+
assert(microsToLocalDateTime(0) == LocalDateTime.parse("1970-01-01T00:00:00"))
767+
assert(microsToLocalDateTime(100) == LocalDateTime.parse("1970-01-01T00:00:00.0001"))
768+
assert(microsToLocalDateTime(100000000) == LocalDateTime.parse("1970-01-01T00:01:40"))
769+
assert(microsToLocalDateTime(100000000000L) == LocalDateTime.parse("1970-01-02T03:46:40"))
770770
assert(microsToLocalDateTime(253402300799999999L) ==
771771
LocalDateTime.parse("9999-12-31T23:59:59.999999"))
772772
assert(microsToLocalDateTime(Long.MinValue) ==
@@ -1126,4 +1126,44 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper {
11261126
}.getMessage
11271127
assert(msg == "long overflow")
11281128
}
1129+
1130+
test("stringToTime") {
1131+
def checkStringToTime(str: String, expected: Option[Long]): Unit = {
1132+
assert(stringToTime(UTF8String.fromString(str)) === expected)
1133+
}
1134+
1135+
checkStringToTime("00:00", Some(localTime()))
1136+
checkStringToTime("00:00:00", Some(localTime()))
1137+
checkStringToTime("00:00:00.1", Some(localTime(micros = 100000)))
1138+
checkStringToTime("00:00:59.01", Some(localTime(sec = 59, micros = 10000)))
1139+
checkStringToTime("00:59:00.001", Some(localTime(minute = 59, micros = 1000)))
1140+
checkStringToTime("23:00:00.0001", Some(localTime(hour = 23, micros = 100)))
1141+
checkStringToTime("23:59:00.00001", Some(localTime(hour = 23, minute = 59, micros = 10)))
1142+
checkStringToTime("23:59:59.000001",
1143+
Some(localTime(hour = 23, minute = 59, sec = 59, micros = 1)))
1144+
checkStringToTime("23:59:59.999999",
1145+
Some(localTime(hour = 23, minute = 59, sec = 59, micros = 999999)))
1146+
1147+
checkStringToTime("1:2:3.0", Some(localTime(hour = 1, minute = 2, sec = 3)))
1148+
checkStringToTime("T1:02:3.04", Some(localTime(hour = 1, minute = 2, sec = 3, micros = 40000)))
1149+
1150+
// Negative tests
1151+
Seq("2025-03-09 00:00:00", "00", "00:01:02 UTC").foreach { invalidTime =>
1152+
checkStringToTime(invalidTime, None)
1153+
}
1154+
}
1155+
1156+
test("stringToTimeAnsi") {
1157+
Seq("2025-03-09T00:00:00", "012", "00:01:02Z").foreach { invalidTime =>
1158+
checkError(
1159+
exception = intercept[SparkDateTimeException] {
1160+
stringToTimeAnsi(UTF8String.fromString(invalidTime))
1161+
},
1162+
condition = "CAST_INVALID_INPUT",
1163+
parameters = Map(
1164+
"expression" -> s"'$invalidTime'",
1165+
"sourceType" -> "\"STRING\"",
1166+
"targetType" -> "\"TIME(6)\""))
1167+
}
1168+
}
11291169
}

0 commit comments

Comments
 (0)