@@ -28,7 +28,7 @@ use arrow::array::types::{
2828 ArrowTimestampType , TimestampMicrosecondType , TimestampMillisecondType ,
2929 TimestampNanosecondType , TimestampSecondType ,
3030} ;
31- use arrow:: array:: { Array , ArrayRef , Int64Array , PrimitiveArray } ;
31+ use arrow:: array:: { Array , PrimitiveArray } ;
3232use arrow:: datatypes:: DataType :: { self , Null , Timestamp , Utf8 , Utf8View } ;
3333use arrow:: datatypes:: TimeUnit :: { self , Microsecond , Millisecond , Nanosecond , Second } ;
3434use datafusion_common:: cast:: as_primitive_array;
@@ -60,8 +60,6 @@ use chrono::{
6060 - hour / HOUR
6161 - minute / MINUTE
6262 - second / SECOND
63- - millisecond / MILLISECOND
64- - microsecond / MICROSECOND
6563"#
6664 ) ,
6765 argument(
@@ -187,26 +185,6 @@ impl ScalarUDFImpl for DateTruncFunc {
187185 ) -> Result < ColumnarValue > {
188186 let parsed_tz = parse_tz ( tz_opt) ?;
189187 let array = as_primitive_array :: < T > ( array) ?;
190-
191- // fast path for fine granularities
192- if matches ! (
193- granularity. as_str( ) ,
194- // For modern timezones, it's correct to truncate "minute" in this way.
195- // Both datafusion and arrow are ignoring historical timezone's non-minute granularity
196- // bias (e.g., Asia/Kathmandu before 1919 is UTC+05:41:16).
197- "second" | "minute" | "millisecond" | "microsecond"
198- ) ||
199- // In UTC, "hour" and "day" have uniform durations and can be truncated with simple arithmetic
200- ( parsed_tz. is_none ( ) && matches ! ( granularity. as_str( ) , "hour" | "day" ) )
201- {
202- let result = general_date_trunc_array_fine_granularity (
203- T :: UNIT ,
204- array,
205- granularity. as_str ( ) ,
206- ) ?;
207- return Ok ( ColumnarValue :: Array ( result) ) ;
208- }
209-
210188 let array: PrimitiveArray < T > = array
211189 . try_unary ( |x| {
212190 general_date_trunc ( T :: UNIT , x, parsed_tz, granularity. as_str ( ) )
@@ -445,55 +423,6 @@ fn date_trunc_coarse(granularity: &str, value: i64, tz: Option<Tz>) -> Result<i6
445423 Ok ( value. unwrap ( ) )
446424}
447425
448- /// Fast path for fine granularities (hour and smaller) that can be handled
449- /// with simple arithmetic operations without calendar complexity.
450- ///
451- /// This function is timezone-agnostic and should only be used when:
452- /// - No timezone is specified in the input, OR
453- /// - The granularity is less than hour as hour can be affected by DST transitions in some cases
454- fn general_date_trunc_array_fine_granularity < T : ArrowTimestampType > (
455- tu : TimeUnit ,
456- array : & PrimitiveArray < T > ,
457- granularity : & str ,
458- ) -> Result < ArrayRef > {
459- let unit = match ( tu, granularity) {
460- ( Second , "minute" ) => Some ( Int64Array :: new_scalar ( 60 ) ) ,
461- ( Second , "hour" ) => Some ( Int64Array :: new_scalar ( 3600 ) ) ,
462- ( Second , "day" ) => Some ( Int64Array :: new_scalar ( 86400 ) ) ,
463-
464- ( Millisecond , "second" ) => Some ( Int64Array :: new_scalar ( 1_000 ) ) ,
465- ( Millisecond , "minute" ) => Some ( Int64Array :: new_scalar ( 60_000 ) ) ,
466- ( Millisecond , "hour" ) => Some ( Int64Array :: new_scalar ( 3_600_000 ) ) ,
467- ( Millisecond , "day" ) => Some ( Int64Array :: new_scalar ( 86_400_000 ) ) ,
468-
469- ( Microsecond , "millisecond" ) => Some ( Int64Array :: new_scalar ( 1_000 ) ) ,
470- ( Microsecond , "second" ) => Some ( Int64Array :: new_scalar ( 1_000_000 ) ) ,
471- ( Microsecond , "minute" ) => Some ( Int64Array :: new_scalar ( 60_000_000 ) ) ,
472- ( Microsecond , "hour" ) => Some ( Int64Array :: new_scalar ( 3_600_000_000 ) ) ,
473- ( Microsecond , "day" ) => Some ( Int64Array :: new_scalar ( 86_400_000_000 ) ) ,
474-
475- ( Nanosecond , "microsecond" ) => Some ( Int64Array :: new_scalar ( 1_000 ) ) ,
476- ( Nanosecond , "millisecond" ) => Some ( Int64Array :: new_scalar ( 1_000_000 ) ) ,
477- ( Nanosecond , "second" ) => Some ( Int64Array :: new_scalar ( 1_000_000_000 ) ) ,
478- ( Nanosecond , "minute" ) => Some ( Int64Array :: new_scalar ( 60_000_000_000 ) ) ,
479- ( Nanosecond , "hour" ) => Some ( Int64Array :: new_scalar ( 3_600_000_000_000 ) ) ,
480- ( Nanosecond , "day" ) => Some ( Int64Array :: new_scalar ( 86_400_000_000_000 ) ) ,
481- _ => None ,
482- } ;
483-
484- if let Some ( unit) = unit {
485- let original_type = array. data_type ( ) ;
486- let array = arrow:: compute:: cast ( array, & DataType :: Int64 ) ?;
487- let array = arrow:: compute:: kernels:: numeric:: div ( & array, & unit) ?;
488- let array = arrow:: compute:: kernels:: numeric:: mul ( & array, & unit) ?;
489- let array = arrow:: compute:: cast ( & array, original_type) ?;
490- Ok ( array)
491- } else {
492- // truncate to the same or smaller unit
493- Ok ( Arc :: new ( array. clone ( ) ) )
494- }
495- }
496-
497426// truncates a single value with the given timeunit to the specified granularity
498427fn general_date_trunc (
499428 tu : TimeUnit ,
@@ -957,21 +886,6 @@ mod tests {
957886 "2018-11-04T02:00:00-02" ,
958887 ] ,
959888 ) ,
960- (
961- vec![
962- "2024-10-26T23:30:00Z" ,
963- "2024-10-27T00:30:00Z" ,
964- "2024-10-27T01:30:00Z" ,
965- "2024-10-27T02:30:00Z" ,
966- ] ,
967- Some ( "Asia/Kathmandu" . into( ) ) , // UTC+5:45
968- vec![
969- "2024-10-27T05:00:00+05:45" ,
970- "2024-10-27T06:00:00+05:45" ,
971- "2024-10-27T07:00:00+05:45" ,
972- "2024-10-27T08:00:00+05:45" ,
973- ] ,
974- ) ,
975889 ] ;
976890
977891 cases. iter ( ) . for_each ( |( original, tz_opt, expected) | {
0 commit comments