Skip to content

Commit 76a4239

Browse files
MaxGekkyaooqinn
authored andcommitted
[SPARK-51456][SQL] Add the to_time function
### What changes were proposed in this pull request? In the PR, I propose to add new function `to_time()`. It casts a `STRING` input value to `TIME` using an optional formatting. #### Syntax ``` to_time(expr[, fmt]) ``` #### Arguments - expr: A `STRING` expression representing a time. - fmt: An optional format STRING expression. If `fmt` is supplied, it must conform with the datetime patterns, see https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html . If `fmt` is not supplied, the function is a synonym for `cast(expr AS TIME)`. #### Returns A TIME(n) where n is always 6 in the proposed implementation. #### Examples ```sql > SELECT to_time('00:12:00'); 00:12:00 > SELECT to_time('12.10.05', 'HH.mm.ss'); 12:10:05 ``` ### Why are the changes needed? 1. To improve user experience with Spark SQL, and allow to construct values of the new data type `TIME` from strings. 2. To simplify migration from other systems where `to_time` is supported. For instance: - Snowflake: https://docs.snowflake.com/en/sql-reference/functions/to_time - BigQuery: https://cloud.google.com/bigquery/docs/reference/standard-sql/time_functions#parse_time - MySQL: https://dev.mysql.com/doc/refman/8.4/en/date-and-time-functions.html#function_str-to-date 3. For consistency: Spark SQL has already `to_timestamp()` for `TIMESTAMP` and `to_date()` for `DATE`. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? By running the related test suites: ``` $ build/sbt "test:testOnly *ExpressionInfoSuite" $ build/sbt "test:testOnly *TimeExpressionsSuite" $ build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite -- -z time.sql" ``` ### Was this patch authored or co-authored using generative AI tooling? No. Closes #50287 from MaxGekk/to_time-2. Authored-by: Max Gekk <max.gekk@gmail.com> Signed-off-by: Kent Yao <yao@apache.org>
1 parent 513a080 commit 76a4239

File tree

10 files changed

+299
-2
lines changed

10 files changed

+299
-2
lines changed

common/utils/src/main/resources/error/error-conditions.json

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -405,6 +405,12 @@
405405
],
406406
"sqlState" : "22018"
407407
},
408+
"CANNOT_PARSE_TIME" : {
409+
"message" : [
410+
"The input string <input> cannot be parsed to a TIME value because it does not match to the datetime format <format>."
411+
],
412+
"sqlState" : "22010"
413+
},
408414
"CANNOT_PARSE_TIMESTAMP" : {
409415
"message" : [
410416
"<message>. Use <func> to tolerate invalid input string and return NULL instead."

sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/TimeFormatter.scala

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -122,10 +122,12 @@ object TimeFormatter {
122122
getFormatter(Some(format), locale, isParsing)
123123
}
124124

125-
def apply(format: String, isParsing: Boolean): TimeFormatter = {
126-
getFormatter(Some(format), defaultLocale, isParsing)
125+
def apply(format: Option[String], isParsing: Boolean): TimeFormatter = {
126+
getFormatter(format, defaultLocale, isParsing)
127127
}
128128

129+
def apply(format: String, isParsing: Boolean): TimeFormatter = apply(Some(format), isParsing)
130+
129131
def apply(format: String): TimeFormatter = {
130132
getFormatter(Some(format), defaultLocale, isParsing = false)
131133
}

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -650,6 +650,7 @@ object FunctionRegistry {
650650
expression[Second]("second"),
651651
expression[ParseToTimestamp]("to_timestamp"),
652652
expression[ParseToDate]("to_date"),
653+
expression[ToTime]("to_time"),
653654
expression[ToBinary]("to_binary"),
654655
expression[ToUnixTimestamp]("to_unix_timestamp"),
655656
expression[ToUTCTimestamp]("to_utc_timestamp"),
Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.spark.sql.catalyst.expressions
19+
20+
import java.time.DateTimeException
21+
22+
import org.apache.spark.sql.catalyst.expressions.objects.Invoke
23+
import org.apache.spark.sql.catalyst.util.TimeFormatter
24+
import org.apache.spark.sql.errors.QueryExecutionErrors
25+
import org.apache.spark.sql.internal.types.StringTypeWithCollation
26+
import org.apache.spark.sql.types.{AbstractDataType, ObjectType, TimeType}
27+
import org.apache.spark.unsafe.types.UTF8String
28+
29+
/**
30+
* Parses a column to a time based on the given format.
31+
*/
32+
// scalastyle:off line.size.limit
33+
@ExpressionDescription(
34+
usage = """
35+
_FUNC_(str[, format]) - Parses the `str` expression with the `format` expression to a time.
36+
If `format` is malformed or its application does not result in a well formed time, the function
37+
raises an error. By default, it follows casting rules to a time if the `format` is omitted.
38+
""",
39+
arguments = """
40+
Arguments:
41+
* str - A string to be parsed to time.
42+
* format - Time format pattern to follow. See <a href="https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html">Datetime Patterns</a> for valid
43+
time format patterns.
44+
""",
45+
examples = """
46+
Examples:
47+
> SELECT _FUNC_('00:12:00');
48+
00:12:00
49+
> SELECT _FUNC_('12.10.05', 'HH.mm.ss');
50+
12:10:05
51+
""",
52+
group = "datetime_funcs",
53+
since = "4.1.0")
54+
// scalastyle:on line.size.limit
55+
case class ToTime(str: Expression, format: Option[Expression])
56+
extends RuntimeReplaceable with ExpectsInputTypes {
57+
58+
def this(str: Expression, format: Expression) = this(str, Option(format))
59+
def this(str: Expression) = this(str, None)
60+
61+
private def invokeParser(
62+
fmt: Option[String] = None,
63+
arguments: Seq[Expression] = children): Expression = {
64+
Invoke(
65+
targetObject = Literal.create(ToTimeParser(fmt), ObjectType(classOf[ToTimeParser])),
66+
functionName = "parse",
67+
dataType = TimeType(),
68+
arguments = arguments,
69+
methodInputTypes = arguments.map(_.dataType))
70+
}
71+
72+
override lazy val replacement: Expression = format match {
73+
case None => invokeParser()
74+
case Some(expr) if expr.foldable =>
75+
Option(expr.eval())
76+
.map(f => invokeParser(Some(f.toString), Seq(str)))
77+
.getOrElse(Literal(null, expr.dataType))
78+
case _ => invokeParser()
79+
}
80+
81+
override def inputTypes: Seq[AbstractDataType] = {
82+
Seq(StringTypeWithCollation(supportsTrimCollation = true)) ++
83+
format.map(_ => StringTypeWithCollation(supportsTrimCollation = true))
84+
}
85+
86+
override def prettyName: String = "to_time"
87+
88+
override def children: Seq[Expression] = str +: format.toSeq
89+
90+
override protected def withNewChildrenInternal(
91+
newChildren: IndexedSeq[Expression]): Expression = {
92+
if (format.isDefined) {
93+
copy(str = newChildren.head, format = Some(newChildren.last))
94+
} else {
95+
copy(str = newChildren.head)
96+
}
97+
}
98+
}
99+
100+
case class ToTimeParser(fmt: Option[String]) {
101+
private lazy val formatter = TimeFormatter(fmt, isParsing = true)
102+
103+
def this() = this(None)
104+
105+
private def withErrorCondition(input: => UTF8String, fmt: => Option[String])
106+
(f: => Long): Long = {
107+
try f
108+
catch {
109+
case e: DateTimeException =>
110+
throw QueryExecutionErrors.timeParseError(input.toString, fmt, e)
111+
}
112+
}
113+
114+
def parse(s: UTF8String): Long = withErrorCondition(s, fmt)(formatter.parse(s.toString))
115+
116+
def parse(s: UTF8String, fmt: UTF8String): Long = {
117+
val format = fmt.toString
118+
withErrorCondition(s, Some(format)) {
119+
TimeFormatter(format, isParsing = true).parse(s.toString)
120+
}
121+
}
122+
}

sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -276,6 +276,19 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase with ExecutionE
276276
summary = "")
277277
}
278278

279+
def timeParseError(input: String, fmt: Option[String], e: Throwable): SparkDateTimeException = {
280+
new SparkDateTimeException(
281+
errorClass = "CANNOT_PARSE_TIME",
282+
messageParameters = Map(
283+
"input" -> toSQLValue(input, StringType),
284+
"format" -> toSQLValue(
285+
fmt.getOrElse("HH:mm:ss.SSSSSS"),
286+
StringType)),
287+
context = Array.empty,
288+
summary = "",
289+
cause = Some(e))
290+
}
291+
279292
def ansiDateTimeArgumentOutOfRange(e: Exception): SparkDateTimeException = {
280293
new SparkDateTimeException(
281294
errorClass = "DATETIME_FIELD_OUT_OF_BOUNDS",
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.spark.sql.catalyst.expressions
19+
20+
import org.apache.spark.{SparkDateTimeException, SparkFunSuite}
21+
import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._
22+
import org.apache.spark.sql.types.StringType
23+
24+
class TimeExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
25+
test("ParseToTime") {
26+
checkEvaluation(new ToTime(Literal("00:00:00"), Literal.create(null)), null)
27+
checkEvaluation(new ToTime(Literal("00:00:00"), NonFoldableLiteral(null, StringType)), null)
28+
checkEvaluation(new ToTime(Literal(null, StringType), Literal("HH:mm:ss")), null)
29+
30+
checkEvaluation(new ToTime(Literal("00:00:00")), localTime())
31+
checkEvaluation(new ToTime(Literal("23-59-00.000999"), Literal("HH-mm-ss.SSSSSS")),
32+
localTime(23, 59, 0, 999))
33+
checkEvaluation(
34+
new ToTime(Literal("12.00.59.90909"), NonFoldableLiteral("HH.mm.ss.SSSSS")),
35+
localTime(12, 0, 59, 909090))
36+
checkEvaluation(
37+
new ToTime(NonFoldableLiteral(" 12:00.909 "), Literal(" HH:mm.SSS ")),
38+
localTime(12, 0, 0, 909000))
39+
checkEvaluation(
40+
new ToTime(
41+
NonFoldableLiteral("12 hours 123 millis"),
42+
NonFoldableLiteral("HH 'hours' SSS 'millis'")),
43+
localTime(12, 0, 0, 123000))
44+
45+
checkErrorInExpression[SparkDateTimeException](
46+
expression = new ToTime(Literal("100:50")),
47+
condition = "CANNOT_PARSE_TIME",
48+
parameters = Map("input" -> "'100:50'", "format" -> "'HH:mm:ss.SSSSSS'"))
49+
checkErrorInExpression[SparkDateTimeException](
50+
expression = new ToTime(Literal("100:50"), Literal("mm:HH")),
51+
condition = "CANNOT_PARSE_TIME",
52+
parameters = Map("input" -> "'100:50'", "format" -> "'mm:HH'"))
53+
}
54+
}

sql/core/src/test/resources/sql-functions/sql-expression-schema.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -345,6 +345,7 @@
345345
| org.apache.spark.sql.catalyst.expressions.ToDegrees | degrees | SELECT degrees(3.141592653589793) | struct<DEGREES(3.141592653589793):double> |
346346
| org.apache.spark.sql.catalyst.expressions.ToNumber | to_number | SELECT to_number('454', '999') | struct<to_number(454, 999):decimal(3,0)> |
347347
| org.apache.spark.sql.catalyst.expressions.ToRadians | radians | SELECT radians(180) | struct<RADIANS(180):double> |
348+
| org.apache.spark.sql.catalyst.expressions.ToTime | to_time | SELECT to_time('00:12:00') | struct<to_time(00:12:00):time(6)> |
348349
| org.apache.spark.sql.catalyst.expressions.ToUTCTimestamp | to_utc_timestamp | SELECT to_utc_timestamp('2016-08-31', 'Asia/Seoul') | struct<to_utc_timestamp(2016-08-31, Asia/Seoul):timestamp> |
349350
| org.apache.spark.sql.catalyst.expressions.ToUnixTimestamp | to_unix_timestamp | SELECT to_unix_timestamp('2016-04-08', 'yyyy-MM-dd') | struct<to_unix_timestamp(2016-04-08, yyyy-MM-dd):bigint> |
350351
| org.apache.spark.sql.catalyst.expressions.TransformKeys | transform_keys | SELECT transform_keys(map_from_arrays(array(1, 2, 3), array(1, 2, 3)), (k, v) -> k + 1) | struct<transform_keys(map_from_arrays(array(1, 2, 3), array(1, 2, 3)), lambdafunction((namedlambdavariable() + 1), namedlambdavariable(), namedlambdavariable())):map<int,int>> |
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,46 @@
11
-- Automatically generated by SQLQueryTestSuite
2+
-- !query
3+
create temporary view time_view as select '11:53:26.038344' time_str, 'HH:mm:ss.SSSSSS' fmt_str
4+
-- !query analysis
5+
CreateViewCommand `time_view`, select '11:53:26.038344' time_str, 'HH:mm:ss.SSSSSS' fmt_str, false, false, LocalTempView, UNSUPPORTED, true
6+
+- Project [11:53:26.038344 AS time_str#x, HH:mm:ss.SSSSSS AS fmt_str#x]
7+
+- OneRowRelation
8+
9+
210
-- !query
311
select time '16:39:45\t'
412
-- !query analysis
513
Project [59985000000 AS 59985000000#x]
614
+- OneRowRelation
15+
16+
17+
-- !query
18+
select to_time(null), to_time('01:02:03'), to_time('23-59-59.999999', 'HH-mm-ss.SSSSSS')
19+
-- !query analysis
20+
Project [to_time(null, None) AS to_time(NULL)#x, to_time(01:02:03, None) AS to_time(01:02:03)#x, to_time(23-59-59.999999, Some(HH-mm-ss.SSSSSS)) AS to_time(23-59-59.999999, HH-mm-ss.SSSSSS)#x]
21+
+- OneRowRelation
22+
23+
24+
-- !query
25+
select to_time(time_str, fmt_str) from time_view
26+
-- !query analysis
27+
Project [to_time(time_str#x, Some(fmt_str#x)) AS to_time(time_str, fmt_str)#x]
28+
+- SubqueryAlias time_view
29+
+- View (`time_view`, [time_str#x, fmt_str#x])
30+
+- Project [cast(time_str#x as string) AS time_str#x, cast(fmt_str#x as string) AS fmt_str#x]
31+
+- Project [11:53:26.038344 AS time_str#x, HH:mm:ss.SSSSSS AS fmt_str#x]
32+
+- OneRowRelation
33+
34+
35+
-- !query
36+
select to_time("11", "HH")
37+
-- !query analysis
38+
Project [to_time(11, Some(HH)) AS to_time(11, HH)#x]
39+
+- OneRowRelation
40+
41+
42+
-- !query
43+
select to_time("13-60", "HH-mm")
44+
-- !query analysis
45+
Project [to_time(13-60, Some(HH-mm)) AS to_time(13-60, HH-mm)#x]
46+
+- OneRowRelation
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,13 @@
11
-- time literals, functions and operations
22

3+
create temporary view time_view as select '11:53:26.038344' time_str, 'HH:mm:ss.SSSSSS' fmt_str;
4+
35
select time '16:39:45\t';
6+
7+
select to_time(null), to_time('01:02:03'), to_time('23-59-59.999999', 'HH-mm-ss.SSSSSS');
8+
select to_time(time_str, fmt_str) from time_view;
9+
10+
-- missing fields in `to_time`
11+
select to_time("11", "HH");
12+
-- invalid: there is no 13 hours
13+
select to_time("13-60", "HH-mm");
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,55 @@
11
-- Automatically generated by SQLQueryTestSuite
2+
-- !query
3+
create temporary view time_view as select '11:53:26.038344' time_str, 'HH:mm:ss.SSSSSS' fmt_str
4+
-- !query schema
5+
struct<>
6+
-- !query output
7+
8+
9+
210
-- !query
311
select time '16:39:45\t'
412
-- !query schema
513
struct<59985000000:time(6)>
614
-- !query output
715
16:39:45
16+
17+
18+
-- !query
19+
select to_time(null), to_time('01:02:03'), to_time('23-59-59.999999', 'HH-mm-ss.SSSSSS')
20+
-- !query schema
21+
struct<to_time(NULL):time(6),to_time(01:02:03):time(6),to_time(23-59-59.999999, HH-mm-ss.SSSSSS):time(6)>
22+
-- !query output
23+
NULL 01:02:03 23:59:59.999999
24+
25+
26+
-- !query
27+
select to_time(time_str, fmt_str) from time_view
28+
-- !query schema
29+
struct<to_time(time_str, fmt_str):time(6)>
30+
-- !query output
31+
11:53:26.038344
32+
33+
34+
-- !query
35+
select to_time("11", "HH")
36+
-- !query schema
37+
struct<to_time(11, HH):time(6)>
38+
-- !query output
39+
11:00:00
40+
41+
42+
-- !query
43+
select to_time("13-60", "HH-mm")
44+
-- !query schema
45+
struct<>
46+
-- !query output
47+
org.apache.spark.SparkDateTimeException
48+
{
49+
"errorClass" : "CANNOT_PARSE_TIME",
50+
"sqlState" : "22010",
51+
"messageParameters" : {
52+
"format" : "'HH-mm'",
53+
"input" : "'13-60'"
54+
}
55+
}

0 commit comments

Comments
 (0)