@@ -28,7 +28,7 @@ use arrow::array::types::{
2828 ArrowTimestampType , TimestampMicrosecondType , TimestampMillisecondType ,
2929 TimestampNanosecondType , TimestampSecondType ,
3030} ;
31- use arrow:: array:: { Array , PrimitiveArray } ;
31+ use arrow:: array:: { Array , ArrayRef , Int64Array , PrimitiveArray } ;
3232use arrow:: datatypes:: DataType :: { self , Null , Timestamp , Utf8 , Utf8View } ;
3333use arrow:: datatypes:: TimeUnit :: { self , Microsecond , Millisecond , Nanosecond , Second } ;
3434use datafusion_common:: cast:: as_primitive_array;
@@ -60,6 +60,8 @@ use chrono::{
6060 - hour / HOUR
6161 - minute / MINUTE
6262 - second / SECOND
63+ - millisecond / MILLISECOND
64+ - microsecond / MICROSECOND
6365"#
6466 ) ,
6567 argument(
@@ -185,6 +187,26 @@ impl ScalarUDFImpl for DateTruncFunc {
185187 ) -> Result < ColumnarValue > {
186188 let parsed_tz = parse_tz ( tz_opt) ?;
187189 let array = as_primitive_array :: < T > ( array) ?;
190+
191+ // fast path for fine granularities
192+ if matches ! (
193+ granularity. as_str( ) ,
194+ // For morden timezones, it's correct to truncate "minute" in this way.
195+ // Both datafusion and arrow are ignoring historical timezone's non-minute granularity
196+ // bias (e.g., Asia/Kathmandu before 1919 is UTC+05:41:16).
197+ "second" | "minute" | "millisecond" | "microsecond"
198+ ) ||
199+ // In UTC, "hour" and "day" have uniform durations and can be truncated with simple arithmetic
200+ ( parsed_tz. is_none ( ) && matches ! ( granularity. as_str( ) , "hour" | "day" ) )
201+ {
202+ let result = general_date_trunc_array_fine_granularity (
203+ T :: UNIT ,
204+ array,
205+ granularity. as_str ( ) ,
206+ ) ?;
207+ return Ok ( ColumnarValue :: Array ( result) ) ;
208+ }
209+
188210 let array: PrimitiveArray < T > = array
189211 . try_unary ( |x| {
190212 general_date_trunc ( T :: UNIT , x, parsed_tz, granularity. as_str ( ) )
@@ -423,6 +445,55 @@ fn date_trunc_coarse(granularity: &str, value: i64, tz: Option<Tz>) -> Result<i6
423445 Ok ( value. unwrap ( ) )
424446}
425447
448+ /// Fast path for fine granularities (hour and smaller) that can be handled
449+ /// with simple arithmetic operations without calendar complexity.
450+ ///
451+ /// This function is timezone-agnostic and should only be used when:
452+ /// - No timezone is specified in the input, OR
453+ /// - The granularity is less than hour as hour can be affected by DST transitions in some cases
454+ fn general_date_trunc_array_fine_granularity < T : ArrowTimestampType > (
455+ tu : TimeUnit ,
456+ array : & PrimitiveArray < T > ,
457+ granularity : & str ,
458+ ) -> Result < ArrayRef > {
459+ let unit = match ( tu, granularity) {
460+ ( Second , "minute" ) => Some ( Int64Array :: new_scalar ( 60 ) ) ,
461+ ( Second , "hour" ) => Some ( Int64Array :: new_scalar ( 3600 ) ) ,
462+ ( Second , "day" ) => Some ( Int64Array :: new_scalar ( 86400 ) ) ,
463+
464+ ( Millisecond , "second" ) => Some ( Int64Array :: new_scalar ( 1_000 ) ) ,
465+ ( Millisecond , "minute" ) => Some ( Int64Array :: new_scalar ( 60_000 ) ) ,
466+ ( Millisecond , "hour" ) => Some ( Int64Array :: new_scalar ( 3_600_000 ) ) ,
467+ ( Millisecond , "day" ) => Some ( Int64Array :: new_scalar ( 86_400_000 ) ) ,
468+
469+ ( Microsecond , "millisecond" ) => Some ( Int64Array :: new_scalar ( 1_000 ) ) ,
470+ ( Microsecond , "second" ) => Some ( Int64Array :: new_scalar ( 1_000_000 ) ) ,
471+ ( Microsecond , "minute" ) => Some ( Int64Array :: new_scalar ( 60_000_000 ) ) ,
472+ ( Microsecond , "hour" ) => Some ( Int64Array :: new_scalar ( 3_600_000_000 ) ) ,
473+ ( Microsecond , "day" ) => Some ( Int64Array :: new_scalar ( 86_400_000_000 ) ) ,
474+
475+ ( Nanosecond , "microsecond" ) => Some ( Int64Array :: new_scalar ( 1_000 ) ) ,
476+ ( Nanosecond , "millisecond" ) => Some ( Int64Array :: new_scalar ( 1_000_000 ) ) ,
477+ ( Nanosecond , "second" ) => Some ( Int64Array :: new_scalar ( 1_000_000_000 ) ) ,
478+ ( Nanosecond , "minute" ) => Some ( Int64Array :: new_scalar ( 60_000_000_000 ) ) ,
479+ ( Nanosecond , "hour" ) => Some ( Int64Array :: new_scalar ( 3_600_000_000_000 ) ) ,
480+ ( Nanosecond , "day" ) => Some ( Int64Array :: new_scalar ( 86_400_000_000_000 ) ) ,
481+ _ => None ,
482+ } ;
483+
484+ if let Some ( unit) = unit {
485+ let original_type = array. data_type ( ) ;
486+ let array = arrow:: compute:: cast ( array, & DataType :: Int64 ) ?;
487+ let array = arrow:: compute:: kernels:: numeric:: div ( & array, & unit) ?;
488+ let array = arrow:: compute:: kernels:: numeric:: mul ( & array, & unit) ?;
489+ let array = arrow:: compute:: cast ( & array, original_type) ?;
490+ Ok ( array)
491+ } else {
492+ // truncate to the same or smaller unit
493+ Ok ( Arc :: new ( array. clone ( ) ) )
494+ }
495+ }
496+
426497// truncates a single value with the given timeunit to the specified granularity
427498fn general_date_trunc (
428499 tu : TimeUnit ,
@@ -884,6 +955,21 @@ mod tests {
884955 "2018-11-04T02:00:00-02" ,
885956 ] ,
886957 ) ,
958+ (
959+ vec![
960+ "2024-10-26T23:30:00Z" ,
961+ "2024-10-27T00:30:00Z" ,
962+ "2024-10-27T01:30:00Z" ,
963+ "2024-10-27T02:30:00Z" ,
964+ ] ,
965+ Some ( "Asia/Kathmandu" . into( ) ) , // UTC+5:45
966+ vec![
967+ "2024-10-27T05:00:00+05:45" ,
968+ "2024-10-27T06:00:00+05:45" ,
969+ "2024-10-27T07:00:00+05:45" ,
970+ "2024-10-27T08:00:00+05:45" ,
971+ ] ,
972+ ) ,
887973 ] ;
888974
889975 cases. iter ( ) . for_each ( |( original, tz_opt, expected) | {
0 commit comments