Skip to content

Commit fd02372

Browse files
bersprocketspeter-toth
authored andcommitted
[SPARK-53804][SQL] Support TIME radix sort
### What changes were proposed in this pull request? Add support for radix sort of the Time type. This PR is based on #35279, which added similar support for TimestampNTZ. ### Why are the changes needed? Better performance when sorting by one Time type column. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? New test. Also, I added temporary debug statements to `SortExec` in a working copy with this change and one without this change, then tested a join by the Time type. In the working copy with the change, `canUseRadixSort` = true. In the working copy without the change, `canUseRadixSort` = false. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #52520 from bersprockets/time_sortprefix. Authored-by: Bruce Robbins <[email protected]> Signed-off-by: Peter Toth <[email protected]>
1 parent 08bd390 commit fd02372

File tree

3 files changed

+12
-5
lines changed

3 files changed

+12
-5
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@ object SortOrder {
128128
case class SortPrefix(child: SortOrder) extends UnaryExpression {
129129

130130
val nullValue = child.child.dataType match {
131-
case BooleanType | DateType | TimestampType | TimestampNTZType |
131+
case BooleanType | DateType | TimestampType | TimestampNTZType | _: TimeType |
132132
_: IntegralType | _: AnsiIntervalType =>
133133
if (nullAsSmallest) Long.MinValue else Long.MaxValue
134134
case dt: DecimalType if dt.precision - dt.scale <= Decimal.MAX_LONG_DIGITS =>
@@ -151,7 +151,7 @@ case class SortPrefix(child: SortOrder) extends UnaryExpression {
151151
private lazy val calcPrefix: Any => Long = child.child.dataType match {
152152
case BooleanType => (raw) =>
153153
if (raw.asInstanceOf[Boolean]) 1 else 0
154-
case DateType | TimestampType | TimestampNTZType |
154+
case DateType | TimestampType | TimestampNTZType | _: TimeType |
155155
_: IntegralType | _: AnsiIntervalType => (raw) =>
156156
raw.asInstanceOf[java.lang.Number].longValue()
157157
case FloatType | DoubleType => (raw) => {
@@ -202,7 +202,7 @@ case class SortPrefix(child: SortOrder) extends UnaryExpression {
202202
s"$input ? 1L : 0L"
203203
case _: IntegralType =>
204204
s"(long) $input"
205-
case DateType | TimestampType | TimestampNTZType | _: AnsiIntervalType =>
205+
case DateType | TimestampType | TimestampNTZType | _: TimeType | _: AnsiIntervalType =>
206206
s"(long) $input"
207207
case FloatType | DoubleType =>
208208
s"$DoublePrefixCmp.computePrefix((double)$input)"

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SortOrderExpressionsSuite.scala

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,11 @@
1818
package org.apache.spark.sql.catalyst.expressions
1919

2020
import java.sql.Timestamp
21+
import java.time.LocalTime
2122

2223
import org.apache.spark.SparkFunSuite
2324
import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch
25+
import org.apache.spark.sql.catalyst.util.SparkDateTimeUtils
2426
import org.apache.spark.sql.types._
2527
import org.apache.spark.unsafe.types.UTF8String
2628
import org.apache.spark.util.collection.unsafe.sort.PrefixComparators._
@@ -51,6 +53,9 @@ class SortOrderExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper
5153
val dec3 = Literal(Decimal(20132983L, 21, 2))
5254
val list1 = Literal.create(Seq(1, 2), ArrayType(IntegerType))
5355
val nullVal = Literal.create(null, IntegerType)
56+
val tm1LocalTime = LocalTime.of(21, 15, 1, 123456)
57+
val tm1Nano = SparkDateTimeUtils.localTimeToNanos(tm1LocalTime)
58+
val tm1 = Literal.create(tm1LocalTime, TimeType(6))
5459

5560
checkEvaluation(SortPrefix(SortOrder(b1, Ascending)), 0L)
5661
checkEvaluation(SortPrefix(SortOrder(b2, Ascending)), 1L)
@@ -83,6 +88,7 @@ class SortOrderExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper
8388
DoublePrefixComparator.computePrefix(201329.83d))
8489
checkEvaluation(SortPrefix(SortOrder(list1, Ascending)), 0L)
8590
checkEvaluation(SortPrefix(SortOrder(nullVal, Ascending)), null)
91+
checkEvaluation(SortPrefix(SortOrder(tm1, Ascending)), tm1Nano)
8692
}
8793

8894
test("Cannot sort map type") {

sql/core/src/main/scala/org/apache/spark/sql/execution/SortPrefixUtils.scala

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ object SortPrefixUtils {
4343
case StringType => stringPrefixComparator(sortOrder)
4444
case BinaryType => binaryPrefixComparator(sortOrder)
4545
case BooleanType | ByteType | ShortType | IntegerType | LongType | DateType | TimestampType |
46-
TimestampNTZType | _: AnsiIntervalType =>
46+
TimestampNTZType | _: TimeType |_: AnsiIntervalType =>
4747
longPrefixComparator(sortOrder)
4848
case dt: DecimalType if dt.precision - dt.scale <= Decimal.MAX_LONG_DIGITS =>
4949
longPrefixComparator(sortOrder)
@@ -123,7 +123,8 @@ object SortPrefixUtils {
123123
def canSortFullyWithPrefix(sortOrder: SortOrder): Boolean = {
124124
sortOrder.dataType match {
125125
case BooleanType | ByteType | ShortType | IntegerType | LongType | DateType |
126-
TimestampType | TimestampNTZType | FloatType | DoubleType | _: AnsiIntervalType =>
126+
TimestampType | TimestampNTZType | _: TimeType | FloatType | DoubleType |
127+
_: AnsiIntervalType =>
127128
true
128129
case dt: DecimalType if dt.precision <= Decimal.MAX_LONG_DIGITS =>
129130
true

0 commit comments

Comments
 (0)