Skip to content

Commit c0627e2

Browse files
committed
Revert "speedup date_trunc (~7x faster) in some cases (apache#16859)"
apache#16859 contains a regression. This reverts commit 764d547 (apache#16859), pending resolution of apache#18334
1 parent 15d1a43 commit c0627e2

File tree

2 files changed

+1
-89
lines changed

2 files changed

+1
-89
lines changed

datafusion/functions/src/datetime/date_trunc.rs

Lines changed: 1 addition & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ use arrow::array::types::{
2828
ArrowTimestampType, TimestampMicrosecondType, TimestampMillisecondType,
2929
TimestampNanosecondType, TimestampSecondType,
3030
};
31-
use arrow::array::{Array, ArrayRef, Int64Array, PrimitiveArray};
31+
use arrow::array::{Array, PrimitiveArray};
3232
use arrow::datatypes::DataType::{self, Null, Timestamp, Utf8, Utf8View};
3333
use arrow::datatypes::TimeUnit::{self, Microsecond, Millisecond, Nanosecond, Second};
3434
use datafusion_common::cast::as_primitive_array;
@@ -60,8 +60,6 @@ use chrono::{
6060
- hour / HOUR
6161
- minute / MINUTE
6262
- second / SECOND
63-
- millisecond / MILLISECOND
64-
- microsecond / MICROSECOND
6563
"#
6664
),
6765
argument(
@@ -187,26 +185,6 @@ impl ScalarUDFImpl for DateTruncFunc {
187185
) -> Result<ColumnarValue> {
188186
let parsed_tz = parse_tz(tz_opt)?;
189187
let array = as_primitive_array::<T>(array)?;
190-
191-
// fast path for fine granularities
192-
if matches!(
193-
granularity.as_str(),
194-
// For modern timezones, it's correct to truncate "minute" in this way.
195-
// Both datafusion and arrow are ignoring historical timezone's non-minute granularity
196-
// bias (e.g., Asia/Kathmandu before 1919 is UTC+05:41:16).
197-
"second" | "minute" | "millisecond" | "microsecond"
198-
) ||
199-
// In UTC, "hour" and "day" have uniform durations and can be truncated with simple arithmetic
200-
(parsed_tz.is_none() && matches!(granularity.as_str(), "hour" | "day"))
201-
{
202-
let result = general_date_trunc_array_fine_granularity(
203-
T::UNIT,
204-
array,
205-
granularity.as_str(),
206-
)?;
207-
return Ok(ColumnarValue::Array(result));
208-
}
209-
210188
let array: PrimitiveArray<T> = array
211189
.try_unary(|x| {
212190
general_date_trunc(T::UNIT, x, parsed_tz, granularity.as_str())
@@ -445,55 +423,6 @@ fn date_trunc_coarse(granularity: &str, value: i64, tz: Option<Tz>) -> Result<i6
445423
Ok(value.unwrap())
446424
}
447425

448-
/// Fast path for fine granularities (hour and smaller) that can be handled
449-
/// with simple arithmetic operations without calendar complexity.
450-
///
451-
/// This function is timezone-agnostic and should only be used when:
452-
/// - No timezone is specified in the input, OR
453-
/// - The granularity is less than hour as hour can be affected by DST transitions in some cases
454-
fn general_date_trunc_array_fine_granularity<T: ArrowTimestampType>(
455-
tu: TimeUnit,
456-
array: &PrimitiveArray<T>,
457-
granularity: &str,
458-
) -> Result<ArrayRef> {
459-
let unit = match (tu, granularity) {
460-
(Second, "minute") => Some(Int64Array::new_scalar(60)),
461-
(Second, "hour") => Some(Int64Array::new_scalar(3600)),
462-
(Second, "day") => Some(Int64Array::new_scalar(86400)),
463-
464-
(Millisecond, "second") => Some(Int64Array::new_scalar(1_000)),
465-
(Millisecond, "minute") => Some(Int64Array::new_scalar(60_000)),
466-
(Millisecond, "hour") => Some(Int64Array::new_scalar(3_600_000)),
467-
(Millisecond, "day") => Some(Int64Array::new_scalar(86_400_000)),
468-
469-
(Microsecond, "millisecond") => Some(Int64Array::new_scalar(1_000)),
470-
(Microsecond, "second") => Some(Int64Array::new_scalar(1_000_000)),
471-
(Microsecond, "minute") => Some(Int64Array::new_scalar(60_000_000)),
472-
(Microsecond, "hour") => Some(Int64Array::new_scalar(3_600_000_000)),
473-
(Microsecond, "day") => Some(Int64Array::new_scalar(86_400_000_000)),
474-
475-
(Nanosecond, "microsecond") => Some(Int64Array::new_scalar(1_000)),
476-
(Nanosecond, "millisecond") => Some(Int64Array::new_scalar(1_000_000)),
477-
(Nanosecond, "second") => Some(Int64Array::new_scalar(1_000_000_000)),
478-
(Nanosecond, "minute") => Some(Int64Array::new_scalar(60_000_000_000)),
479-
(Nanosecond, "hour") => Some(Int64Array::new_scalar(3_600_000_000_000)),
480-
(Nanosecond, "day") => Some(Int64Array::new_scalar(86_400_000_000_000)),
481-
_ => None,
482-
};
483-
484-
if let Some(unit) = unit {
485-
let original_type = array.data_type();
486-
let array = arrow::compute::cast(array, &DataType::Int64)?;
487-
let array = arrow::compute::kernels::numeric::div(&array, &unit)?;
488-
let array = arrow::compute::kernels::numeric::mul(&array, &unit)?;
489-
let array = arrow::compute::cast(&array, original_type)?;
490-
Ok(array)
491-
} else {
492-
// truncate to the same or smaller unit
493-
Ok(Arc::new(array.clone()))
494-
}
495-
}
496-
497426
// truncates a single value with the given timeunit to the specified granularity
498427
fn general_date_trunc(
499428
tu: TimeUnit,
@@ -957,21 +886,6 @@ mod tests {
957886
"2018-11-04T02:00:00-02",
958887
],
959888
),
960-
(
961-
vec![
962-
"2024-10-26T23:30:00Z",
963-
"2024-10-27T00:30:00Z",
964-
"2024-10-27T01:30:00Z",
965-
"2024-10-27T02:30:00Z",
966-
],
967-
Some("Asia/Kathmandu".into()), // UTC+5:45
968-
vec![
969-
"2024-10-27T05:00:00+05:45",
970-
"2024-10-27T06:00:00+05:45",
971-
"2024-10-27T07:00:00+05:45",
972-
"2024-10-27T08:00:00+05:45",
973-
],
974-
),
975889
];
976890

977891
cases.iter().for_each(|(original, tz_opt, expected)| {

docs/source/user-guide/sql/scalar_functions.md

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2151,8 +2151,6 @@ date_trunc(precision, expression)
21512151
- hour / HOUR
21522152
- minute / MINUTE
21532153
- second / SECOND
2154-
- millisecond / MILLISECOND
2155-
- microsecond / MICROSECOND
21562154

21572155
- **expression**: Time expression to operate on. Can be a constant, column, or function.
21582156

0 commit comments

Comments
 (0)