Skip to content

Commit 890ee47

Browse files
MaxGekkdongjoon-hyun
authored andcommitted
[SPARK-51454][SQL] Support cast from time to string
### What changes were proposed in this pull request? In the PR, I propose to support casting of TIME to STRING using the fraction formatter which formats times according to the pattern `HH:mm:ss.[..fff..]`. The pattern `[..fff..]` is a fraction of second up to microsecond resolution. The formatter does not output trailing zeros in the fraction. Apparently the `ToPrettyString` expression and `.show()` support such casting too. Also the PR adds the support of TIME literals created from `java.time.LocalTime` (used it tests). ### Why are the changes needed? To output time values in user friendly format. Before the changes, show prints internal representation of time values: ```scala scala> Seq(LocalTime.parse("17:18:19")).toDS.show() +-----------+ | value| +-----------+ |62299000000| +-----------+ ``` ### Does this PR introduce _any_ user-facing change? Yes. After the changes, the command above outputs time values in new format: ```scala scala> Seq(LocalTime.parse("17:18:19")).toDS.show() +--------+ | value| +--------+ |17:18:19| +--------+ ``` ### How was this patch tested? By running new tests: ``` $ build/sbt "test:testOnly *ToPrettyStringSuite" $ build/sbt "test:testOnly *CastWithAnsiOnSuite" ``` ### Was this patch authored or co-authored using generative AI tooling? No. Closes #50224 from MaxGekk/time-cast-to-string. Authored-by: Max Gekk <max.gekk@gmail.com> Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
1 parent ef4cd39 commit 890ee47

File tree

5 files changed

+38
-6
lines changed

5 files changed

+38
-6
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ToStringBase.scala

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ import java.time.ZoneOffset
2222
import org.apache.spark.sql.catalyst.InternalRow
2323
import org.apache.spark.sql.catalyst.expressions.codegen._
2424
import org.apache.spark.sql.catalyst.expressions.codegen.Block._
25-
import org.apache.spark.sql.catalyst.util.{ArrayData, CharVarcharCodegenUtils, DateFormatter, IntervalStringStyles, IntervalUtils, MapData, SparkStringUtils, TimestampFormatter}
25+
import org.apache.spark.sql.catalyst.util.{ArrayData, CharVarcharCodegenUtils, DateFormatter, FractionTimeFormatter, IntervalStringStyles, IntervalUtils, MapData, SparkStringUtils, TimestampFormatter}
2626
import org.apache.spark.sql.catalyst.util.IntervalStringStyles.ANSI_STYLE
2727
import org.apache.spark.sql.internal.SQLConf
2828
import org.apache.spark.sql.internal.SQLConf.BinaryOutputStyle
@@ -34,6 +34,7 @@ import org.apache.spark.util.ArrayImplicits._
3434
trait ToStringBase { self: UnaryExpression with TimeZoneAwareExpression =>
3535

3636
private lazy val dateFormatter = DateFormatter()
37+
private lazy val timeFormatter = new FractionTimeFormatter()
3738
private lazy val timestampFormatter = TimestampFormatter.getFractionFormatter(zoneId)
3839
private lazy val timestampNTZFormatter = TimestampFormatter.getFractionFormatter(ZoneOffset.UTC)
3940

@@ -73,6 +74,8 @@ trait ToStringBase { self: UnaryExpression with TimeZoneAwareExpression =>
7374
acceptAny[Long](t => UTF8String.fromString(timestampFormatter.format(t)))
7475
case TimestampNTZType =>
7576
acceptAny[Long](t => UTF8String.fromString(timestampNTZFormatter.format(t)))
77+
case _: TimeType =>
78+
acceptAny[Long](t => UTF8String.fromString(timeFormatter.format(t)))
7679
case ArrayType(et, _) =>
7780
acceptAny[ArrayData](array => {
7881
val builder = new UTF8StringBuilder
@@ -224,6 +227,11 @@ trait ToStringBase { self: UnaryExpression with TimeZoneAwareExpression =>
224227
ctx.addReferenceObj("timestampNTZFormatter", timestampNTZFormatter),
225228
timestampNTZFormatter.getClass)
226229
(c, evPrim) => code"$evPrim = UTF8String.fromString($tf.format($c));"
230+
case _: TimeType =>
231+
val tf = JavaCode.global(
232+
ctx.addReferenceObj("timeFormatter", timeFormatter),
233+
timeFormatter.getClass)
234+
(c, evPrim) => code"$evPrim = UTF8String.fromString($tf.format($c));"
227235
case CalendarIntervalType =>
228236
(c, evPrim) => code"$evPrim = UTF8String.fromString($c.toString());"
229237
case ArrayType(et, _) =>

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ import java.lang.{Short => JavaShort}
2828
import java.math.{BigDecimal => JavaBigDecimal}
2929
import java.nio.charset.StandardCharsets
3030
import java.sql.{Date, Timestamp}
31-
import java.time.{Duration, Instant, LocalDate, LocalDateTime, Period, ZoneOffset}
31+
import java.time.{Duration, Instant, LocalDate, LocalDateTime, LocalTime, Period, ZoneOffset}
3232
import java.util
3333
import java.util.Objects
3434

@@ -49,7 +49,7 @@ import org.apache.spark.sql.catalyst.trees.TreePattern
4949
import org.apache.spark.sql.catalyst.trees.TreePattern.{LITERAL, NULL_LITERAL, TRUE_OR_FALSE_LITERAL}
5050
import org.apache.spark.sql.catalyst.types._
5151
import org.apache.spark.sql.catalyst.util._
52-
import org.apache.spark.sql.catalyst.util.DateTimeUtils.instantToMicros
52+
import org.apache.spark.sql.catalyst.util.DateTimeUtils.{instantToMicros, localTimeToMicros}
5353
import org.apache.spark.sql.catalyst.util.IntervalStringStyles.ANSI_STYLE
5454
import org.apache.spark.sql.catalyst.util.IntervalUtils.{durationToMicros, periodToMonths, toDayTimeIntervalString, toYearMonthIntervalString}
5555
import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
@@ -89,6 +89,7 @@ object Literal {
8989
case l: LocalDateTime => Literal(DateTimeUtils.localDateTimeToMicros(l), TimestampNTZType)
9090
case ld: LocalDate => Literal(ld.toEpochDay.toInt, DateType)
9191
case d: Date => Literal(DateTimeUtils.fromJavaDate(d), DateType)
92+
case lt: LocalTime => Literal(localTimeToMicros(lt), TimeType())
9293
case d: Duration => Literal(durationToMicros(d), DayTimeIntervalType())
9394
case p: Period => Literal(periodToMonths(p), YearMonthIntervalType())
9495
case a: Array[Byte] => Literal(a, BinaryType)
@@ -521,7 +522,7 @@ case class Literal (value: Any, dataType: DataType) extends LeafExpression {
521522
}
522523
case ByteType | ShortType =>
523524
ExprCode.forNonNullValue(JavaCode.expression(s"($javaType)$value", dataType))
524-
case TimestampType | TimestampNTZType | LongType | _: DayTimeIntervalType =>
525+
case TimestampType | TimestampNTZType | LongType | _: DayTimeIntervalType | _: TimeType =>
525526
toExprCode(s"${value}L")
526527
case _ =>
527528
val constRef = ctx.addReferenceObj("literal", value, javaType)

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
package org.apache.spark.sql.catalyst.expressions
1919

2020
import java.sql.{Date, Timestamp}
21-
import java.time.{Duration, LocalDate, LocalDateTime, Period}
21+
import java.time.{Duration, LocalDate, LocalDateTime, LocalTime, Period}
2222
import java.time.temporal.ChronoUnit
2323
import java.util.{Calendar, Locale, TimeZone}
2424

@@ -82,7 +82,7 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
8282
}
8383

8484
atomicTypes.foreach(dt => checkNullCast(NullType, dt))
85-
atomicTypes.foreach(dt => checkNullCast(dt, StringType))
85+
(atomicTypes ++ timeTypes).foreach(dt => checkNullCast(dt, StringType))
8686
checkNullCast(StringType, BinaryType)
8787
checkNullCast(StringType, BooleanType)
8888
numericTypes.foreach(dt => checkNullCast(dt, BooleanType))
@@ -1457,4 +1457,16 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
14571457
}
14581458
}
14591459
}
1460+
1461+
test("cast time to string") {
1462+
Seq(
1463+
LocalTime.MIDNIGHT -> "00:00:00",
1464+
LocalTime.NOON -> "12:00:00",
1465+
LocalTime.of(23, 59, 59) -> "23:59:59",
1466+
LocalTime.of(23, 59, 59, 1000000) -> "23:59:59.001",
1467+
LocalTime.of(23, 59, 59, 999999000) -> "23:59:59.999999"
1468+
).foreach { case (time, expectedStr) =>
1469+
checkEvaluation(Cast(Literal(time), StringType), expectedStr)
1470+
}
1471+
}
14601472
}

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ToPrettyStringSuite.scala

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,4 +134,11 @@ class ToPrettyStringSuite extends SparkFunSuite with ExpressionEvalHelper {
134134
val prettyString = ToPrettyString(child)
135135
assert(prettyString.sql === child.sql)
136136
}
137+
138+
test("Time as pretty strings") {
139+
checkEvaluation(ToPrettyString(Literal(1000L, TimeType())), "00:00:00.001")
140+
checkEvaluation(ToPrettyString(Literal(1L, TimeType())), "00:00:00.000001")
141+
checkEvaluation(ToPrettyString(Literal(
142+
(23 * 3600 + 59 * 60 + 59) * 1000000L, TimeType())), "23:59:59")
143+
}
137144
}

sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeTestUtils.scala

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,10 @@ object DataTypeTestUtils {
6969
YearMonthIntervalType(YEAR),
7070
YearMonthIntervalType(MONTH))
7171

72+
val timeTypes: Seq[TimeType] = Seq(
73+
TimeType(TimeType.MIN_PRECISION),
74+
TimeType(TimeType.MAX_PRECISION))
75+
7276
val unsafeRowMutableFieldTypes: Seq[DataType] = Seq(
7377
NullType,
7478
BooleanType,

0 commit comments

Comments
 (0)