diff --git a/datafusion/core/tests/sql/expr.rs b/datafusion/core/tests/sql/expr.rs index ea7b14a71d5a..8cd6b7c4cf9b 100644 --- a/datafusion/core/tests/sql/expr.rs +++ b/datafusion/core/tests/sql/expr.rs @@ -1124,11 +1124,19 @@ async fn test_extract_date_part() -> Result<()> { "EXTRACT(year FROM to_timestamp('2020-09-08T12:00:00+00:00'))", "2020" ); + test_expression!( + "date_part('YEAR', INTERVAL '1 year 2 month 3 day 4 hour 5 minute 6 second')", + "1" + ); test_expression!("date_part('MONTH', CAST('2000-01-01' AS DATE))", "1"); test_expression!( "EXTRACT(month FROM to_timestamp('2020-09-08T12:00:00+00:00'))", "9" ); + test_expression!( + "date_part('MONTH', INTERVAL '1 year 2 month 3 day 4 hour 5 minute 6 second')", + "2" + ); test_expression!("date_part('WEEK', CAST('2003-01-01' AS DATE))", "1"); // TODO Creating logical plan for 'SELECT EXTRACT(WEEK FROM to_timestamp('2020-09-08T12:00:00+00:00'))' @@ -1144,11 +1152,19 @@ async fn test_extract_date_part() -> Result<()> { "EXTRACT(day FROM to_timestamp('2020-09-08T12:00:00+00:00'))", "8" ); + test_expression!( + "date_part('DAY', INTERVAL '1 year 2 month 3 day 4 hour 5 minute 6 second')", + "3" + ); test_expression!("date_part('HOUR', CAST('2000-01-01' AS DATE))", "0"); test_expression!( "EXTRACT(hour FROM to_timestamp('2020-09-08T12:03:03+00:00'))", "12" ); + test_expression!( + "date_part('HOUR', INTERVAL '1 year 2 month 3 day 4 hour 5 minute 6 second')", + "4" + ); test_expression!( "EXTRACT(minute FROM to_timestamp('2020-09-08T12:12:00+00:00'))", "12" @@ -1157,6 +1173,10 @@ async fn test_extract_date_part() -> Result<()> { "date_part('minute', to_timestamp('2020-09-08T12:12:00+00:00'))", "12" ); + test_expression!( + "date_part('MINUTE', INTERVAL '1 year 2 month 3 day 4 hour 5 minute 6 second')", + "5" + ); test_expression!( "EXTRACT(second FROM to_timestamp('2020-09-08T12:00:12+00:00'))", "12" @@ -1165,6 +1185,10 @@ async fn test_extract_date_part() -> Result<()> { "date_part('second', to_timestamp('2020-09-08T12:00:12+00:00'))", "12" ); + test_expression!( + "date_part('SECOND', INTERVAL '1 year 2 month 3 day 4 hour 5 minute 6 second')", + "6" + ); // DOY test_expression!( diff --git a/datafusion/cube_ext/src/temporal.rs b/datafusion/cube_ext/src/temporal.rs index 2cf97b539a4a..275a5c0837d7 100644 --- a/datafusion/cube_ext/src/temporal.rs +++ b/datafusion/cube_ext/src/temporal.rs @@ -15,8 +15,13 @@ // specific language governing permissions and limitations // under the License. -use arrow::array::{Array, Float64Array, Int32Array, Int32Builder, PrimitiveArray}; -use arrow::compute::kernels::arity::unary; +use arrow::array::{ + Array, Date32Array, Date64Array, Float64Array, Int32Array, Int32Builder, + IntervalDayTimeArray, IntervalMonthDayNanoArray, IntervalYearMonthArray, + PrimitiveArray, TimestampMicrosecondArray, TimestampMillisecondArray, + TimestampNanosecondArray, TimestampSecondArray, +}; +use arrow::compute::kernels::{arity::unary, temporal as arrow_temporal}; use arrow::datatypes::{ ArrowNumericType, ArrowPrimitiveType, ArrowTemporalType, DataType, Date32Type, Date64Type, Float64Type, IntervalDayTimeType, IntervalMonthDayNanoType, @@ -295,3 +300,266 @@ where Ok(b.finish()) } + +/// This macro will generate a trait `DatePartable` that is automatically implemented +/// for all Arrow temporal types. +macro_rules! date_partable { + ($($gran:ident),*) => { + pub trait DatePartable: ArrowPrimitiveType + Sized { + $( + fn $gran(array: &PrimitiveArray) -> Result; + )* + } + + impl DatePartable for TimestampSecondType { + $( + fn $gran(array: &TimestampSecondArray) -> Result { + arrow_temporal::$gran(array) + } + )* + } + + impl DatePartable for TimestampMillisecondType { + $( + fn $gran(array: &TimestampMillisecondArray) -> Result { + arrow_temporal::$gran(array) + } + )* + } + + impl DatePartable for TimestampMicrosecondType { + $( + fn $gran(array: &TimestampMicrosecondArray) -> Result { + arrow_temporal::$gran(array) + } + )* + } + + impl DatePartable for TimestampNanosecondType { + $( + fn $gran(array: &TimestampNanosecondArray) -> Result { + arrow_temporal::$gran(array) + } + )* + } + + impl DatePartable for Date32Type { + $( + fn $gran(array: &Date32Array) -> Result { + arrow_temporal::$gran(array) + } + )* + } + + impl DatePartable for Date64Type { + $( + fn $gran(array: &Date64Array) -> Result { + arrow_temporal::$gran(array) + } + )* + } + + $( + pub fn $gran(array: &PrimitiveArray) -> Result + where + T: DatePartable, + { + DatePartable::$gran(array) + } + )* + }; +} + +date_partable!(year, quarter, month, day, hour, minute, second); + +fn interval_year_month_op( + array: &IntervalYearMonthArray, + op: fn(i32) -> Result, +) -> Result { + let mut builder = Int32Builder::new(array.len()); + for i in 0..array.len() { + if array.is_null(i) { + builder.append_null()?; + continue; + } + let value = array.value(i); + let result = op(value)?; + builder.append_value(result)?; + } + Ok(builder.finish()) +} + +fn interval_day_time_op( + array: &IntervalDayTimeArray, + op: fn(i64) -> Result, +) -> Result { + let mut builder = Int32Builder::new(array.len()); + for i in 0..array.len() { + if array.is_null(i) { + builder.append_null()?; + continue; + } + let value = array.value(i); + let result = op(value)?; + builder.append_value(result)?; + } + Ok(builder.finish()) +} + +fn interval_month_day_nano_op( + array: &IntervalMonthDayNanoArray, + op: fn(i128) -> Result, +) -> Result { + let mut builder = Int32Builder::new(array.len()); + for i in 0..array.len() { + if array.is_null(i) { + builder.append_null()?; + continue; + } + let value = array.value(i); + let result = op(value)?; + builder.append_value(result)?; + } + Ok(builder.finish()) +} + +impl DatePartable for IntervalYearMonthType { + fn year(array: &IntervalYearMonthArray) -> Result { + interval_year_month_op(array, |v| Ok(v / 12)) + } + + fn quarter(array: &IntervalYearMonthArray) -> Result { + interval_year_month_op(array, |v| Ok(v % 12 / 3 + 1)) + } + + fn month(array: &IntervalYearMonthArray) -> Result { + interval_year_month_op(array, |v| Ok(v % 12)) + } + + fn day(array: &IntervalYearMonthArray) -> Result { + interval_year_month_op(array, |_| Ok(0)) + } + + fn hour(array: &IntervalYearMonthArray) -> Result { + interval_year_month_op(array, |_| Ok(0)) + } + + fn minute(array: &IntervalYearMonthArray) -> Result { + interval_year_month_op(array, |_| Ok(0)) + } + + fn second(array: &IntervalYearMonthArray) -> Result { + interval_year_month_op(array, |_| Ok(0)) + } +} + +impl DatePartable for IntervalDayTimeType { + fn year(array: &IntervalDayTimeArray) -> Result { + interval_day_time_op(array, |_| Ok(0)) + } + + fn quarter(array: &IntervalDayTimeArray) -> Result { + interval_day_time_op(array, |_| Ok(1)) + } + + fn month(array: &IntervalDayTimeArray) -> Result { + interval_day_time_op(array, |_| Ok(0)) + } + + fn day(array: &IntervalDayTimeArray) -> Result { + interval_day_time_op(array, |v| { + let (days, _) = IntervalDayTimeType::to_parts(v); + Ok(days) + }) + } + + fn hour(array: &IntervalDayTimeArray) -> Result { + interval_day_time_op(array, |v| { + let (_, millis) = IntervalDayTimeType::to_parts(v); + Ok(millis / 3_600_000) + }) + } + + fn minute(array: &IntervalDayTimeArray) -> Result { + interval_day_time_op(array, |v| { + let (_, millis) = IntervalDayTimeType::to_parts(v); + Ok(millis % 3_600_000 / 60_000) + }) + } + + fn second(array: &IntervalDayTimeArray) -> Result { + // NOTE: this technically should return Float64 with millis in the decimal part, + // but we are returning Int32 for compatibility with the original implementation. + interval_day_time_op(array, |v| { + let (_, millis) = IntervalDayTimeType::to_parts(v); + Ok(millis % 60_000 / 1_000) + }) + } +} + +impl DatePartable for IntervalMonthDayNanoType { + fn year(array: &IntervalMonthDayNanoArray) -> Result { + interval_month_day_nano_op(array, |v| { + let (months, _, _) = IntervalMonthDayNanoType::to_parts(v); + Ok(months / 12) + }) + } + + fn quarter(array: &IntervalMonthDayNanoArray) -> Result { + interval_month_day_nano_op(array, |v| { + let (months, _, _) = IntervalMonthDayNanoType::to_parts(v); + Ok(months % 12 / 3 + 1) + }) + } + + fn month(array: &IntervalMonthDayNanoArray) -> Result { + interval_month_day_nano_op(array, |v| { + let (months, _, _) = IntervalMonthDayNanoType::to_parts(v); + Ok(months % 12) + }) + } + + fn day(array: &IntervalMonthDayNanoArray) -> Result { + interval_month_day_nano_op(array, |v| { + let (_, days, _) = IntervalMonthDayNanoType::to_parts(v); + Ok(days) + }) + } + + fn hour(array: &IntervalMonthDayNanoArray) -> Result { + interval_month_day_nano_op(array, |v| { + let (_, _, nanos) = IntervalMonthDayNanoType::to_parts(v); + (nanos / 3_600_000_000_000).try_into().map_err(|_| { + ArrowError::ComputeError("Unable to convert i64 nanos to i32".to_string()) + }) + }) + } + + fn minute(array: &IntervalMonthDayNanoArray) -> Result { + interval_month_day_nano_op(array, |v| { + let (_, _, nanos) = IntervalMonthDayNanoType::to_parts(v); + (nanos % 3_600_000_000_000 / 60_000_000_000) + .try_into() + .map_err(|_| { + ArrowError::ComputeError( + "Unable to convert i64 nanos to i32".to_string(), + ) + }) + }) + } + + fn second(array: &IntervalMonthDayNanoArray) -> Result { + // NOTE: this technically should return Float64 with millis in the decimal part, + // but we are returning Int32 for compatibility with the original implementation. + interval_month_day_nano_op(array, |v| { + let (_, _, nanos) = IntervalMonthDayNanoType::to_parts(v); + (nanos % 60_000_000_000 / 1_000_000_000) + .try_into() + .map_err(|_| { + ArrowError::ComputeError( + "Unable to convert i64 nanos to i32".to_string(), + ) + }) + }) + } +} diff --git a/datafusion/expr/src/function.rs b/datafusion/expr/src/function.rs index 1f1a8ab8cec1..450403815e26 100644 --- a/datafusion/expr/src/function.rs +++ b/datafusion/expr/src/function.rs @@ -123,14 +123,7 @@ pub fn return_type( } BuiltinScalarFunction::Concat => Ok(DataType::Utf8), BuiltinScalarFunction::ConcatWithSeparator => Ok(DataType::Utf8), - BuiltinScalarFunction::DatePart => { - match &input_expr_types[1] { - // FIXME: DatePart should *always* return a numeric but this might break things - // so since interval wasn't supported in the first place, this is safe - DataType::Interval(_) => Ok(DataType::Float64), - _ => Ok(DataType::Int32), - } - } + BuiltinScalarFunction::DatePart => Ok(DataType::Float64), BuiltinScalarFunction::DateTrunc => { Ok(DataType::Timestamp(TimeUnit::Nanosecond, None)) } @@ -442,6 +435,14 @@ pub fn signature(fun: &BuiltinScalarFunction) -> Signature { DataType::Utf8, DataType::Timestamp(TimeUnit::Nanosecond, None), ]), + TypeSignature::Exact(vec![ + DataType::Utf8, + DataType::Interval(IntervalUnit::YearMonth), + ]), + TypeSignature::Exact(vec![ + DataType::Utf8, + DataType::Interval(IntervalUnit::DayTime), + ]), TypeSignature::Exact(vec![ DataType::Utf8, DataType::Interval(IntervalUnit::MonthDayNano), diff --git a/datafusion/physical-expr/src/datetime_expressions.rs b/datafusion/physical-expr/src/datetime_expressions.rs index 0b2190d00785..618ffd6c8a31 100644 --- a/datafusion/physical-expr/src/datetime_expressions.rs +++ b/datafusion/physical-expr/src/datetime_expressions.rs @@ -636,34 +636,67 @@ pub fn date_part(args: &[ColumnarValue]) -> Result { }; let arr = match date_part.to_lowercase().as_str() { - "doy" => { - extract_date_part!(array, date_part, cube_ext::temporal::doy, DataType::Int32) - } - "dow" => { - extract_date_part!(array, date_part, cube_ext::temporal::dow, DataType::Int32) - } - "year" => extract_date_part!(array, date_part, temporal::year, DataType::Int32), - "quarter" => { - extract_date_part!(array, date_part, temporal::quarter, DataType::Int32) - } - "month" => extract_date_part!(array, date_part, temporal::month, DataType::Int32), - "week" => extract_date_part!(array, date_part, temporal::week, DataType::Int32), - "day" => extract_date_part!(array, date_part, temporal::day, DataType::Int32), - "hour" => extract_date_part!(array, date_part, temporal::hour, DataType::Int32), - "minute" => { - extract_date_part!(array, date_part, temporal::minute, DataType::Int32) - } - "second" => { - extract_date_part!(array, date_part, temporal::second, DataType::Int32) - } - "epoch" => { - extract_date_part_from_date_or_interval!( - array, - date_part, - cube_ext::temporal::epoch, - DataType::Float64 - ) - } + "doy" => extract_date_part!( + array, + date_part, + cube_ext::temporal::doy, + DataType::Float64 + ), + "dow" => extract_date_part!( + array, + date_part, + cube_ext::temporal::dow, + DataType::Float64 + ), + "year" => extract_date_part_from_date_or_interval!( + array, + date_part, + cube_ext::temporal::year, + DataType::Float64 + ), + "quarter" => extract_date_part_from_date_or_interval!( + array, + date_part, + cube_ext::temporal::quarter, + DataType::Float64 + ), + "month" => extract_date_part_from_date_or_interval!( + array, + date_part, + cube_ext::temporal::month, + DataType::Float64 + ), + "week" => extract_date_part!(array, date_part, temporal::week, DataType::Float64), + "day" => extract_date_part_from_date_or_interval!( + array, + date_part, + cube_ext::temporal::day, + DataType::Float64 + ), + "hour" => extract_date_part_from_date_or_interval!( + array, + date_part, + cube_ext::temporal::hour, + DataType::Float64 + ), + "minute" => extract_date_part_from_date_or_interval!( + array, + date_part, + cube_ext::temporal::minute, + DataType::Float64 + ), + "second" => extract_date_part_from_date_or_interval!( + array, + date_part, + cube_ext::temporal::second, + DataType::Float64 + ), + "epoch" => extract_date_part_from_date_or_interval!( + array, + date_part, + cube_ext::temporal::epoch, + DataType::Float64 + ), _ => Err(DataFusionError::Execution(format!( "Date part '{}' not supported", date_part