Skip to content

Commit 646213e

Browse files
feat: add Time type support to date_trunc function (#19640)
## Which issue does this PR close? <!-- We generally require a GitHub issue to be filed for all bug fixes and enhancements and this helps us generate change logs for our releases. You can link an issue to this PR using the GitHub syntax. For example `Closes #123` indicates that this PR will close issue #123. --> - Part of #19025. ## Rationale for this change <!-- Why are you proposing this change? If this is already explained clearly in the issue then this section is not needed. Explaining clearly why changes are proposed helps reviewers understand your changes and offer better suggestions for fixes. --> ## What changes are included in this PR? - Added Time64/Time32 signatures to date_trunc - Added time truncation logic (hour, minute, second, millisecond, microsecond) - Error for invalid granularities (day, week, month, quarter, year) <!-- There is no need to duplicate the description in the issue here but it is sometimes worth providing a summary of the individual changes in this PR. --> ## Are these changes tested? Yes <!-- We typically require tests for all PRs in order to: 1. Prevent the code from being accidentally broken by subsequent changes 2. Serve as another way to document the expected behavior of the code If tests are not included in your PR, please explain why (for example, are they covered by existing tests)? --> ## Are there any user-facing changes? <!-- If there are user-facing changes then we may require documentation to be updated before approving the PR. --> <!-- If there are any breaking changes to public APIs, please add the `api change` label. --> --------- Co-authored-by: Jeffrey Vo <[email protected]>
1 parent 35ff4ab commit 646213e

File tree

4 files changed

+285
-121
lines changed

4 files changed

+285
-121
lines changed

datafusion/functions/src/datetime/date_trunc.rs

Lines changed: 200 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -22,25 +22,28 @@ use std::str::FromStr;
2222
use std::sync::Arc;
2323

2424
use arrow::array::temporal_conversions::{
25-
as_datetime_with_timezone, timestamp_ns_to_datetime,
25+
MICROSECONDS, MILLISECONDS, NANOSECONDS, as_datetime_with_timezone,
26+
timestamp_ns_to_datetime,
2627
};
2728
use arrow::array::timezone::Tz;
2829
use arrow::array::types::{
29-
ArrowTimestampType, TimestampMicrosecondType, TimestampMillisecondType,
30+
ArrowTimestampType, Time32MillisecondType, Time32SecondType, Time64MicrosecondType,
31+
Time64NanosecondType, TimestampMicrosecondType, TimestampMillisecondType,
3032
TimestampNanosecondType, TimestampSecondType,
3133
};
3234
use arrow::array::{Array, ArrayRef, PrimitiveArray};
33-
use arrow::datatypes::DataType::{self, Null, Timestamp, Utf8, Utf8View};
35+
use arrow::datatypes::DataType::{self, Time32, Time64, Timestamp};
3436
use arrow::datatypes::TimeUnit::{self, Microsecond, Millisecond, Nanosecond, Second};
3537
use datafusion_common::cast::as_primitive_array;
38+
use datafusion_common::types::{NativeType, logical_date, logical_string};
3639
use datafusion_common::{
37-
DataFusionError, Result, ScalarValue, exec_datafusion_err, exec_err, plan_err,
40+
DataFusionError, Result, ScalarValue, exec_datafusion_err, exec_err,
3841
};
39-
use datafusion_expr::TypeSignature::Exact;
4042
use datafusion_expr::sort_properties::{ExprProperties, SortProperties};
4143
use datafusion_expr::{
42-
ColumnarValue, Documentation, ScalarUDFImpl, Signature, TIMEZONE_WILDCARD, Volatility,
44+
ColumnarValue, Documentation, ScalarUDFImpl, Signature, TypeSignature, Volatility,
4345
};
46+
use datafusion_expr_common::signature::{Coercion, TypeSignatureClass};
4447
use datafusion_macros::user_doc;
4548

4649
use chrono::{
@@ -116,16 +119,30 @@ impl DateTruncGranularity {
116119
fn is_fine_granularity_utc(&self) -> bool {
117120
self.is_fine_granularity() || matches!(self, Self::Hour | Self::Day)
118121
}
122+
123+
/// Returns true if this granularity is valid for Time types
124+
/// Time types don't have date components, so day/week/month/quarter/year are not valid
125+
fn valid_for_time(&self) -> bool {
126+
matches!(
127+
self,
128+
Self::Hour
129+
| Self::Minute
130+
| Self::Second
131+
| Self::Millisecond
132+
| Self::Microsecond
133+
)
134+
}
119135
}
120136

121137
#[user_doc(
122138
doc_section(label = "Time and Date Functions"),
123-
description = "Truncates a timestamp value to a specified precision.",
139+
description = "Truncates a timestamp or time value to a specified precision.",
124140
syntax_example = "date_trunc(precision, expression)",
125141
argument(
126142
name = "precision",
127143
description = r#"Time precision to truncate to. The following precisions are supported:
128144
145+
For Timestamp types:
129146
- year / YEAR
130147
- quarter / QUARTER
131148
- month / MONTH
@@ -136,11 +153,18 @@ impl DateTruncGranularity {
136153
- second / SECOND
137154
- millisecond / MILLISECOND
138155
- microsecond / MICROSECOND
156+
157+
For Time types (hour, minute, second, millisecond, microsecond only):
158+
- hour / HOUR
159+
- minute / MINUTE
160+
- second / SECOND
161+
- millisecond / MILLISECOND
162+
- microsecond / MICROSECOND
139163
"#
140164
),
141165
argument(
142166
name = "expression",
143-
description = "Time expression to operate on. Can be a constant, column, or function."
167+
description = "Timestamp or time expression to operate on. Can be a constant, column, or function."
144168
)
145169
)]
146170
#[derive(Debug, PartialEq, Eq, Hash)]
@@ -160,45 +184,21 @@ impl DateTruncFunc {
160184
Self {
161185
signature: Signature::one_of(
162186
vec![
163-
Exact(vec![Utf8, Timestamp(Nanosecond, None)]),
164-
Exact(vec![Utf8View, Timestamp(Nanosecond, None)]),
165-
Exact(vec![
166-
Utf8,
167-
Timestamp(Nanosecond, Some(TIMEZONE_WILDCARD.into())),
168-
]),
169-
Exact(vec![
170-
Utf8View,
171-
Timestamp(Nanosecond, Some(TIMEZONE_WILDCARD.into())),
172-
]),
173-
Exact(vec![Utf8, Timestamp(Microsecond, None)]),
174-
Exact(vec![Utf8View, Timestamp(Microsecond, None)]),
175-
Exact(vec![
176-
Utf8,
177-
Timestamp(Microsecond, Some(TIMEZONE_WILDCARD.into())),
187+
TypeSignature::Coercible(vec![
188+
Coercion::new_exact(TypeSignatureClass::Native(logical_string())),
189+
Coercion::new_implicit(
190+
TypeSignatureClass::Timestamp,
191+
// Allow implicit cast from string and date to timestamp for backward compatibility
192+
vec![
193+
TypeSignatureClass::Native(logical_string()),
194+
TypeSignatureClass::Native(logical_date()),
195+
],
196+
NativeType::Timestamp(Nanosecond, None),
197+
),
178198
]),
179-
Exact(vec![
180-
Utf8View,
181-
Timestamp(Microsecond, Some(TIMEZONE_WILDCARD.into())),
182-
]),
183-
Exact(vec![Utf8, Timestamp(Millisecond, None)]),
184-
Exact(vec![Utf8View, Timestamp(Millisecond, None)]),
185-
Exact(vec![
186-
Utf8,
187-
Timestamp(Millisecond, Some(TIMEZONE_WILDCARD.into())),
188-
]),
189-
Exact(vec![
190-
Utf8View,
191-
Timestamp(Millisecond, Some(TIMEZONE_WILDCARD.into())),
192-
]),
193-
Exact(vec![Utf8, Timestamp(Second, None)]),
194-
Exact(vec![Utf8View, Timestamp(Second, None)]),
195-
Exact(vec![
196-
Utf8,
197-
Timestamp(Second, Some(TIMEZONE_WILDCARD.into())),
198-
]),
199-
Exact(vec![
200-
Utf8View,
201-
Timestamp(Second, Some(TIMEZONE_WILDCARD.into())),
199+
TypeSignature::Coercible(vec![
200+
Coercion::new_exact(TypeSignatureClass::Native(logical_string())),
201+
Coercion::new_exact(TypeSignatureClass::Time),
202202
]),
203203
],
204204
Volatility::Immutable,
@@ -222,17 +222,10 @@ impl ScalarUDFImpl for DateTruncFunc {
222222
}
223223

224224
fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
225-
match &arg_types[1] {
226-
Timestamp(Nanosecond, None) | Utf8 | DataType::Date32 | Null => {
227-
Ok(Timestamp(Nanosecond, None))
228-
}
229-
Timestamp(Nanosecond, tz_opt) => Ok(Timestamp(Nanosecond, tz_opt.clone())),
230-
Timestamp(Microsecond, tz_opt) => Ok(Timestamp(Microsecond, tz_opt.clone())),
231-
Timestamp(Millisecond, tz_opt) => Ok(Timestamp(Millisecond, tz_opt.clone())),
232-
Timestamp(Second, tz_opt) => Ok(Timestamp(Second, tz_opt.clone())),
233-
_ => plan_err!(
234-
"The date_trunc function can only accept timestamp as the second arg."
235-
),
225+
if arg_types[1].is_null() {
226+
Ok(Timestamp(Nanosecond, None))
227+
} else {
228+
Ok(arg_types[1].clone())
236229
}
237230
}
238231

@@ -248,6 +241,9 @@ impl ScalarUDFImpl for DateTruncFunc {
248241
{
249242
v.to_lowercase()
250243
} else if let ColumnarValue::Scalar(ScalarValue::Utf8View(Some(v))) = granularity
244+
{
245+
v.to_lowercase()
246+
} else if let ColumnarValue::Scalar(ScalarValue::LargeUtf8(Some(v))) = granularity
251247
{
252248
v.to_lowercase()
253249
} else {
@@ -256,6 +252,15 @@ impl ScalarUDFImpl for DateTruncFunc {
256252

257253
let granularity = DateTruncGranularity::from_str(&granularity_str)?;
258254

255+
// Check upfront if granularity is valid for Time types
256+
let is_time_type = matches!(array.data_type(), Time64(_) | Time32(_));
257+
if is_time_type && !granularity.valid_for_time() {
258+
return exec_err!(
259+
"date_trunc does not support '{}' granularity for Time types. Valid values are: hour, minute, second, millisecond, microsecond",
260+
granularity_str
261+
);
262+
}
263+
259264
fn process_array<T: ArrowTimestampType>(
260265
array: &dyn Array,
261266
granularity: DateTruncGranularity,
@@ -303,6 +308,10 @@ impl ScalarUDFImpl for DateTruncFunc {
303308
}
304309

305310
Ok(match array {
311+
ColumnarValue::Scalar(ScalarValue::Null) => {
312+
// NULL input returns NULL timestamp
313+
ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(None, None))
314+
}
306315
ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(v, tz_opt)) => {
307316
process_scalar::<TimestampNanosecondType>(v, granularity, tz_opt)?
308317
}
@@ -315,40 +324,77 @@ impl ScalarUDFImpl for DateTruncFunc {
315324
ColumnarValue::Scalar(ScalarValue::TimestampSecond(v, tz_opt)) => {
316325
process_scalar::<TimestampSecondType>(v, granularity, tz_opt)?
317326
}
327+
ColumnarValue::Scalar(ScalarValue::Time64Nanosecond(v)) => {
328+
let truncated = v.map(|val| truncate_time_nanos(val, granularity));
329+
ColumnarValue::Scalar(ScalarValue::Time64Nanosecond(truncated))
330+
}
331+
ColumnarValue::Scalar(ScalarValue::Time64Microsecond(v)) => {
332+
let truncated = v.map(|val| truncate_time_micros(val, granularity));
333+
ColumnarValue::Scalar(ScalarValue::Time64Microsecond(truncated))
334+
}
335+
ColumnarValue::Scalar(ScalarValue::Time32Millisecond(v)) => {
336+
let truncated = v.map(|val| truncate_time_millis(val, granularity));
337+
ColumnarValue::Scalar(ScalarValue::Time32Millisecond(truncated))
338+
}
339+
ColumnarValue::Scalar(ScalarValue::Time32Second(v)) => {
340+
let truncated = v.map(|val| truncate_time_secs(val, granularity));
341+
ColumnarValue::Scalar(ScalarValue::Time32Second(truncated))
342+
}
318343
ColumnarValue::Array(array) => {
319344
let array_type = array.data_type();
320-
if let Timestamp(unit, tz_opt) = array_type {
321-
match unit {
322-
Second => process_array::<TimestampSecondType>(
323-
array,
324-
granularity,
325-
tz_opt,
326-
)?,
327-
Millisecond => process_array::<TimestampMillisecondType>(
328-
array,
329-
granularity,
330-
tz_opt,
331-
)?,
332-
Microsecond => process_array::<TimestampMicrosecondType>(
333-
array,
334-
granularity,
335-
tz_opt,
336-
)?,
337-
Nanosecond => process_array::<TimestampNanosecondType>(
338-
array,
339-
granularity,
340-
tz_opt,
341-
)?,
345+
match array_type {
346+
Timestamp(Second, tz_opt) => {
347+
process_array::<TimestampSecondType>(array, granularity, tz_opt)?
348+
}
349+
Timestamp(Millisecond, tz_opt) => process_array::<
350+
TimestampMillisecondType,
351+
>(
352+
array, granularity, tz_opt
353+
)?,
354+
Timestamp(Microsecond, tz_opt) => process_array::<
355+
TimestampMicrosecondType,
356+
>(
357+
array, granularity, tz_opt
358+
)?,
359+
Timestamp(Nanosecond, tz_opt) => process_array::<
360+
TimestampNanosecondType,
361+
>(
362+
array, granularity, tz_opt
363+
)?,
364+
Time64(Nanosecond) => {
365+
let arr = as_primitive_array::<Time64NanosecondType>(array)?;
366+
let result: PrimitiveArray<Time64NanosecondType> =
367+
arr.unary(|v| truncate_time_nanos(v, granularity));
368+
ColumnarValue::Array(Arc::new(result))
369+
}
370+
Time64(Microsecond) => {
371+
let arr = as_primitive_array::<Time64MicrosecondType>(array)?;
372+
let result: PrimitiveArray<Time64MicrosecondType> =
373+
arr.unary(|v| truncate_time_micros(v, granularity));
374+
ColumnarValue::Array(Arc::new(result))
375+
}
376+
Time32(Millisecond) => {
377+
let arr = as_primitive_array::<Time32MillisecondType>(array)?;
378+
let result: PrimitiveArray<Time32MillisecondType> =
379+
arr.unary(|v| truncate_time_millis(v, granularity));
380+
ColumnarValue::Array(Arc::new(result))
381+
}
382+
Time32(Second) => {
383+
let arr = as_primitive_array::<Time32SecondType>(array)?;
384+
let result: PrimitiveArray<Time32SecondType> =
385+
arr.unary(|v| truncate_time_secs(v, granularity));
386+
ColumnarValue::Array(Arc::new(result))
387+
}
388+
_ => {
389+
return exec_err!(
390+
"second argument of `date_trunc` is an unsupported array type: {array_type}"
391+
);
342392
}
343-
} else {
344-
return exec_err!(
345-
"second argument of `date_trunc` is an unsupported array type: {array_type}"
346-
);
347393
}
348394
}
349395
_ => {
350396
return exec_err!(
351-
"second argument of `date_trunc` must be timestamp scalar or array"
397+
"second argument of `date_trunc` must be timestamp, time scalar or array"
352398
);
353399
}
354400
})
@@ -374,6 +420,76 @@ impl ScalarUDFImpl for DateTruncFunc {
374420
}
375421
}
376422

423+
const NANOS_PER_MICROSECOND: i64 = NANOSECONDS / MICROSECONDS;
424+
const NANOS_PER_MILLISECOND: i64 = NANOSECONDS / MILLISECONDS;
425+
const NANOS_PER_SECOND: i64 = NANOSECONDS;
426+
const NANOS_PER_MINUTE: i64 = 60 * NANOS_PER_SECOND;
427+
const NANOS_PER_HOUR: i64 = 60 * NANOS_PER_MINUTE;
428+
429+
const MICROS_PER_MILLISECOND: i64 = MICROSECONDS / MILLISECONDS;
430+
const MICROS_PER_SECOND: i64 = MICROSECONDS;
431+
const MICROS_PER_MINUTE: i64 = 60 * MICROS_PER_SECOND;
432+
const MICROS_PER_HOUR: i64 = 60 * MICROS_PER_MINUTE;
433+
434+
const MILLIS_PER_SECOND: i32 = MILLISECONDS as i32;
435+
const MILLIS_PER_MINUTE: i32 = 60 * MILLIS_PER_SECOND;
436+
const MILLIS_PER_HOUR: i32 = 60 * MILLIS_PER_MINUTE;
437+
438+
const SECS_PER_MINUTE: i32 = 60;
439+
const SECS_PER_HOUR: i32 = 60 * SECS_PER_MINUTE;
440+
441+
/// Truncate time in nanoseconds to the specified granularity
442+
fn truncate_time_nanos(value: i64, granularity: DateTruncGranularity) -> i64 {
443+
match granularity {
444+
DateTruncGranularity::Hour => value - (value % NANOS_PER_HOUR),
445+
DateTruncGranularity::Minute => value - (value % NANOS_PER_MINUTE),
446+
DateTruncGranularity::Second => value - (value % NANOS_PER_SECOND),
447+
DateTruncGranularity::Millisecond => value - (value % NANOS_PER_MILLISECOND),
448+
DateTruncGranularity::Microsecond => value - (value % NANOS_PER_MICROSECOND),
449+
// Other granularities are not valid for time - should be caught earlier
450+
_ => value,
451+
}
452+
}
453+
454+
/// Truncate time in microseconds to the specified granularity
455+
fn truncate_time_micros(value: i64, granularity: DateTruncGranularity) -> i64 {
456+
match granularity {
457+
DateTruncGranularity::Hour => value - (value % MICROS_PER_HOUR),
458+
DateTruncGranularity::Minute => value - (value % MICROS_PER_MINUTE),
459+
DateTruncGranularity::Second => value - (value % MICROS_PER_SECOND),
460+
DateTruncGranularity::Millisecond => value - (value % MICROS_PER_MILLISECOND),
461+
DateTruncGranularity::Microsecond => value, // Already at microsecond precision
462+
// Other granularities are not valid for time
463+
_ => value,
464+
}
465+
}
466+
467+
/// Truncate time in milliseconds to the specified granularity
468+
fn truncate_time_millis(value: i32, granularity: DateTruncGranularity) -> i32 {
469+
match granularity {
470+
DateTruncGranularity::Hour => value - (value % MILLIS_PER_HOUR),
471+
DateTruncGranularity::Minute => value - (value % MILLIS_PER_MINUTE),
472+
DateTruncGranularity::Second => value - (value % MILLIS_PER_SECOND),
473+
DateTruncGranularity::Millisecond => value, // Already at millisecond precision
474+
DateTruncGranularity::Microsecond => value, // Can't truncate to finer precision
475+
// Other granularities are not valid for time
476+
_ => value,
477+
}
478+
}
479+
480+
/// Truncate time in seconds to the specified granularity
481+
fn truncate_time_secs(value: i32, granularity: DateTruncGranularity) -> i32 {
482+
match granularity {
483+
DateTruncGranularity::Hour => value - (value % SECS_PER_HOUR),
484+
DateTruncGranularity::Minute => value - (value % SECS_PER_MINUTE),
485+
DateTruncGranularity::Second => value, // Already at second precision
486+
DateTruncGranularity::Millisecond => value, // Can't truncate to finer precision
487+
DateTruncGranularity::Microsecond => value, // Can't truncate to finer precision
488+
// Other granularities are not valid for time
489+
_ => value,
490+
}
491+
}
492+
377493
fn _date_trunc_coarse<T>(
378494
granularity: DateTruncGranularity,
379495
value: Option<T>,

0 commit comments

Comments
 (0)