Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 25 additions & 5 deletions datafusion/functions/src/datetime/date_part.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,16 @@ use arrow::datatypes::DataType::{
Date32, Date64, Duration, Interval, Time32, Time64, Timestamp,
};
use arrow::datatypes::TimeUnit::{Microsecond, Millisecond, Nanosecond, Second};
use arrow::datatypes::{DataType, Field, FieldRef, TimeUnit};
use arrow::datatypes::{
DataType, Field, FieldRef, IntervalUnit as ArrowIntervalUnit, TimeUnit,
};
use datafusion_common::types::{NativeType, logical_date};

use datafusion_common::{
Result, ScalarValue,
cast::{
as_date32_array, as_date64_array, as_int32_array, as_time32_millisecond_array,
as_date32_array, as_date64_array, as_int32_array, as_interval_dt_array,
as_interval_mdn_array, as_interval_ym_array, as_time32_millisecond_array,
as_time32_second_array, as_time64_microsecond_array, as_time64_nanosecond_array,
as_timestamp_microsecond_array, as_timestamp_millisecond_array,
as_timestamp_nanosecond_array, as_timestamp_second_array,
Expand All @@ -56,7 +59,7 @@ use datafusion_macros::user_doc;
argument(
name = "part",
description = r#"Part of the date to return. The following date parts are supported:

- year
- quarter (emits value in inclusive range [1, 4] based on which quartile of the year the date is in)
- month
Expand All @@ -70,7 +73,7 @@ use datafusion_macros::user_doc;
- nanosecond
- dow (day of the week where Sunday is 0)
- doy (day of the year)
- epoch (seconds since Unix epoch)
- epoch (seconds since Unix epoch for timestamps/dates, total seconds for intervals)
- isodow (day of the week where Monday is 0)
"#
),
Expand Down Expand Up @@ -349,6 +352,11 @@ fn seconds(array: &dyn Array, unit: TimeUnit) -> Result<ArrayRef> {

fn epoch(array: &dyn Array) -> Result<ArrayRef> {
const SECONDS_IN_A_DAY: f64 = 86400_f64;
// Note: Month-to-second conversion uses 30 days as an approximation.
// This matches PostgreSQL's behavior for interval epoch extraction,
// but does not represent exact calendar months (which vary 28-31 days).
// See: https://doxygen.postgresql.org/datatype_2timestamp_8h.html
const DAYS_PER_MONTH: f64 = 30_f64;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suppose there's no easier way to define this for intervals 🤔

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes! I just followed the PostgreSQL-way for simplicity here, what do u think?

https://doxygen.postgresql.org/datatype_2timestamp_8h.html#ad35c6d425de4ccc4718c6ce7f4bfbba2


let f: Float64Array = match array.data_type() {
Timestamp(Second, _) => as_timestamp_second_array(array)?.unary(|x| x as f64),
Expand All @@ -373,7 +381,19 @@ fn epoch(array: &dyn Array) -> Result<ArrayRef> {
Time64(Nanosecond) => {
as_time64_nanosecond_array(array)?.unary(|x| x as f64 / 1_000_000_000_f64)
}
Interval(_) | Duration(_) => return seconds(array, Second),
Interval(ArrowIntervalUnit::YearMonth) => as_interval_ym_array(array)?
.unary(|x| x as f64 * DAYS_PER_MONTH * SECONDS_IN_A_DAY),
Interval(ArrowIntervalUnit::DayTime) => as_interval_dt_array(array)?.unary(|x| {
x.days as f64 * SECONDS_IN_A_DAY + x.milliseconds as f64 / 1_000_f64
}),
Interval(ArrowIntervalUnit::MonthDayNano) => {
as_interval_mdn_array(array)?.unary(|x| {
x.months as f64 * DAYS_PER_MONTH * SECONDS_IN_A_DAY
+ x.days as f64 * SECONDS_IN_A_DAY
+ x.nanoseconds as f64 / 1_000_000_000_f64
})
}
Duration(_) => return seconds(array, Second),
d => return exec_err!("Cannot convert {d:?} to epoch"),
};
Ok(Arc::new(f))
Expand Down
51 changes: 51 additions & 0 deletions datafusion/sqllogictest/test_files/datetime/date_part.slt
Original file line number Diff line number Diff line change
Expand Up @@ -936,6 +936,57 @@ SELECT extract(second from arrow_cast(NULL, 'Interval(MonthDayNano)'))
----
NULL

# extract epoch from intervals
query R
SELECT extract(epoch from interval '15 minutes')
----
900

query R
SELECT extract(epoch from interval '1 hour')
----
3600

query R
SELECT extract(epoch from interval '1 day')
----
86400

query R
SELECT extract(epoch from interval '1 month')
----
2592000

query R
SELECT extract(epoch from arrow_cast('3 days', 'Interval(DayTime)'))
----
259200

query R
SELECT extract(epoch from arrow_cast('100 milliseconds', 'Interval(MonthDayNano)'))
----
0.1

query R
SELECT extract(epoch from arrow_cast('500 microseconds', 'Interval(MonthDayNano)'))
----
0.0005

query R
SELECT extract(epoch from arrow_cast('2500 nanoseconds', 'Interval(MonthDayNano)'))
----
0.0000025

query R
SELECT extract(epoch from arrow_cast('1 month 2 days 500 milliseconds', 'Interval(MonthDayNano)'))
----
2764800.5

query R
SELECT extract(epoch from arrow_cast('2 months', 'Interval(YearMonth)'))
----
5184000

statement ok
create table t (id int, i interval) as values
(0, interval '5 months 1 day 10 nanoseconds'),
Expand Down
Loading