diff --git a/datafusion/functions/src/datetime/date_part.rs b/datafusion/functions/src/datetime/date_part.rs index 375200d07280b..9e21a457a6219 100644 --- a/datafusion/functions/src/datetime/date_part.rs +++ b/datafusion/functions/src/datetime/date_part.rs @@ -56,8 +56,9 @@ use datafusion_macros::user_doc; argument( name = "part", description = r#"Part of the date to return. The following date parts are supported: - + - year + - isoyear (ISO 8601 week-numbering year) - quarter (emits value in inclusive range [1, 4] based on which quartile of the year the date is in) - month - week (week of the year) @@ -215,6 +216,7 @@ impl ScalarUDFImpl for DatePartFunc { } else { // special cases that can be extracted (in postgres) but are not interval units match part_trim.to_lowercase().as_str() { + "isoyear" => date_part(array.as_ref(), DatePart::YearISO)?, "qtr" | "quarter" => date_part(array.as_ref(), DatePart::Quarter)?, "doy" => date_part(array.as_ref(), DatePart::DayOfYear)?, "dow" => date_part(array.as_ref(), DatePart::DayOfWeekSunday0)?, diff --git a/datafusion/spark/src/function/datetime/date_part.rs b/datafusion/spark/src/function/datetime/date_part.rs new file mode 100644 index 0000000000000..e30a162ef42db --- /dev/null +++ b/datafusion/spark/src/function/datetime/date_part.rs @@ -0,0 +1,139 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use arrow::datatypes::{DataType, Field, FieldRef}; +use datafusion_common::types::logical_date; +use datafusion_common::{ + Result, ScalarValue, internal_err, types::logical_string, utils::take_function_args, +}; +use datafusion_expr::expr::ScalarFunction; +use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyContext}; +use datafusion_expr::{ + Coercion, ColumnarValue, Expr, ReturnFieldArgs, ScalarFunctionArgs, ScalarUDFImpl, + Signature, TypeSignature, TypeSignatureClass, Volatility, +}; +use std::{any::Any, sync::Arc}; + +/// Wrapper around datafusion date_part function to handle +/// Spark behavior returning day of the week 1-indexed instead of 0-indexed and different part aliases. +/// +#[derive(Debug, PartialEq, Eq, Hash)] +pub struct SparkDatePart { + signature: Signature, + aliases: Vec, +} + +impl Default for SparkDatePart { + fn default() -> Self { + Self::new() + } +} + +impl SparkDatePart { + pub fn new() -> Self { + Self { + signature: Signature::one_of( + vec![ + TypeSignature::Coercible(vec![ + Coercion::new_exact(TypeSignatureClass::Native(logical_string())), + Coercion::new_exact(TypeSignatureClass::Timestamp), + ]), + TypeSignature::Coercible(vec![ + Coercion::new_exact(TypeSignatureClass::Native(logical_string())), + Coercion::new_exact(TypeSignatureClass::Native(logical_date())), + ]), + ], + Volatility::Immutable, + ), + aliases: vec![String::from("datepart")], + } + } +} + +impl ScalarUDFImpl for SparkDatePart { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + "date_part" + } + + fn aliases(&self) -> &[String] { + &self.aliases + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, _arg_types: &[DataType]) -> Result { + internal_err!("Use return_field_from_args in this case instead.") + } + + fn return_field_from_args(&self, args: ReturnFieldArgs) -> Result { + let nullable = args.arg_fields.iter().any(|f| f.is_nullable()); + + Ok(Arc::new(Field::new(self.name(), DataType::Int32, nullable))) + } + + fn invoke_with_args(&self, _args: ScalarFunctionArgs) -> Result { + internal_err!("spark date_part should have been simplified to standard date_part") + } + + fn simplify( + &self, + args: Vec, + _info: &SimplifyContext, + ) -> Result { + let [part_expr, date_expr] = take_function_args(self.name(), args)?; + + let part = match part_expr.as_literal() { + Some(ScalarValue::Utf8(Some(v))) + | Some(ScalarValue::Utf8View(Some(v))) + | Some(ScalarValue::LargeUtf8(Some(v))) => v.to_lowercase(), + _ => { + return internal_err!( + "First argument of `DATE_PART` must be non-null scalar Utf8" + ); + } + }; + + // Map Spark-specific date part aliases to datafusion ones + let part = match part.as_str() { + "yearofweek" | "year_iso" => "isoyear", + "dayofweek" => "dow", + "dayofweek_iso" | "dow_iso" => "isodow", + other => other, + }; + + let part_expr = Expr::Literal(ScalarValue::new_utf8(part), None); + + let date_part_expr = Expr::ScalarFunction(ScalarFunction::new_udf( + datafusion_functions::datetime::date_part(), + vec![part_expr, date_expr], + )); + + match part { + // Add 1 for day-of-week parts to convert 0-indexed to 1-indexed + "dow" | "isodow" => Ok(ExprSimplifyResult::Simplified( + date_part_expr + Expr::Literal(ScalarValue::Int32(Some(1)), None), + )), + _ => Ok(ExprSimplifyResult::Simplified(date_part_expr)), + } + } +} diff --git a/datafusion/spark/src/function/datetime/mod.rs b/datafusion/spark/src/function/datetime/mod.rs index 849aa20895990..99618320e1025 100644 --- a/datafusion/spark/src/function/datetime/mod.rs +++ b/datafusion/spark/src/function/datetime/mod.rs @@ -16,6 +16,7 @@ // under the License. pub mod date_add; +pub mod date_part; pub mod date_sub; pub mod extract; pub mod last_day; @@ -36,6 +37,7 @@ make_udf_function!(last_day::SparkLastDay, last_day); make_udf_function!(make_dt_interval::SparkMakeDtInterval, make_dt_interval); make_udf_function!(make_interval::SparkMakeInterval, make_interval); make_udf_function!(next_day::SparkNextDay, next_day); +make_udf_function!(date_part::SparkDatePart, date_part); pub mod expr_fn { use datafusion_functions::export_functions; @@ -83,6 +85,11 @@ pub mod expr_fn { "Returns the first date which is later than start_date and named as indicated. The function returns NULL if at least one of the input parameters is NULL.", arg1 arg2 )); + export_functions!(( + date_part, + "Extracts a part of the date or time from a date, time, or timestamp expression.", + arg1 arg2 + )); } pub fn functions() -> Vec> { @@ -96,5 +103,6 @@ pub fn functions() -> Vec> { make_dt_interval(), make_interval(), next_day(), + date_part(), ] } diff --git a/datafusion/spark/src/lib.rs b/datafusion/spark/src/lib.rs index aad3ceed68ce3..6218f5bc7b30d 100644 --- a/datafusion/spark/src/lib.rs +++ b/datafusion/spark/src/lib.rs @@ -92,9 +92,28 @@ //! let expr = sha2(col("my_data"), lit(256)); //! ``` //! +//! # Example: using the Spark expression planner +//! +//! The [`planner::SparkFunctionPlanner`] provides Spark-compatible expression +//! planning, such as mapping SQL `EXTRACT` expressions to Spark's `date_part` +//! function. To use it, register it with your session context: +//! +//! ```ignore +//! use std::sync::Arc; +//! use datafusion::prelude::SessionContext; +//! use datafusion_spark::planner::SparkFunctionPlanner; +//! +//! let mut ctx = SessionContext::new(); +//! // Register the Spark expression planner +//! ctx.register_expr_planner(Arc::new(SparkFunctionPlanner))?; +//! // Now EXTRACT expressions will use Spark semantics +//! let df = ctx.sql("SELECT EXTRACT(YEAR FROM timestamp_col) FROM my_table").await?; +//! ``` +//! //![`Expr`]: datafusion_expr::Expr pub mod function; +pub mod planner; use datafusion_catalog::TableFunction; use datafusion_common::Result; diff --git a/datafusion/spark/src/planner.rs b/datafusion/spark/src/planner.rs new file mode 100644 index 0000000000000..0947dd7ffc54c --- /dev/null +++ b/datafusion/spark/src/planner.rs @@ -0,0 +1,34 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use datafusion_expr::Expr; +use datafusion_expr::expr::ScalarFunction; +use datafusion_expr::planner::{ExprPlanner, PlannerResult}; + +#[derive(Default, Debug)] +pub struct SparkFunctionPlanner; + +impl ExprPlanner for SparkFunctionPlanner { + fn plan_extract( + &self, + args: Vec, + ) -> datafusion_common::Result>> { + Ok(PlannerResult::Planned(Expr::ScalarFunction( + ScalarFunction::new_udf(crate::function::datetime::date_part(), args), + ))) + } +} diff --git a/datafusion/sqllogictest/src/test_context.rs b/datafusion/sqllogictest/src/test_context.rs index d416dc1bcfbfc..feb1967710bc5 100644 --- a/datafusion/sqllogictest/src/test_context.rs +++ b/datafusion/sqllogictest/src/test_context.rs @@ -80,11 +80,18 @@ impl TestContext { // hardcode target partitions so plans are deterministic .with_target_partitions(4); let runtime = Arc::new(RuntimeEnv::default()); - let mut state = SessionStateBuilder::new() + + let mut state_builder = SessionStateBuilder::new() .with_config(config) - .with_runtime_env(runtime) - .with_default_features() - .build(); + .with_runtime_env(runtime); + + if is_spark_path(relative_path) { + state_builder = state_builder.with_expr_planners(vec![Arc::new( + datafusion_spark::planner::SparkFunctionPlanner, + )]); + } + + let mut state = state_builder.with_default_features().build(); if is_spark_path(relative_path) { info!("Registering Spark functions"); diff --git a/datafusion/sqllogictest/test_files/datetime/date_part.slt b/datafusion/sqllogictest/test_files/datetime/date_part.slt index bee8602d80bd2..253e4cedbb944 100644 --- a/datafusion/sqllogictest/test_files/datetime/date_part.slt +++ b/datafusion/sqllogictest/test_files/datetime/date_part.slt @@ -81,6 +81,23 @@ SELECT date_part('year', ts_nano_no_tz), date_part('year', ts_nano_utc), date_pa 2020 2020 2019 2020 2020 2019 2020 2020 2019 2020 2020 2019 +# date_part (isoyear) with columns and explicit timestamp +query IIIIII +SELECT date_part('isoyear', ts_nano_no_tz), date_part('isoyear', ts_nano_utc), date_part('isoyear', ts_nano_eastern), date_part('isoyear', ts_milli_no_tz), date_part('isoyear', ts_milli_utc), date_part('isoyear', ts_milli_eastern) FROM source_ts; +---- +2020 2020 2020 2020 2020 2020 +2020 2020 2020 2020 2020 2020 +2020 2020 2020 2020 2020 2020 +2020 2020 2020 2020 2020 2020 +2020 2020 2020 2020 2020 2020 +2020 2020 2020 2020 2020 2020 +2020 2020 2020 2020 2020 2020 +2020 2020 2020 2020 2020 2020 +2020 2020 2020 2020 2020 2020 +2020 2020 2020 2020 2020 2020 +2020 2020 2020 2020 2020 2020 + + # date_part (month) query IIIIII SELECT date_part('month', ts_nano_no_tz), date_part('month', ts_nano_utc), date_part('month', ts_nano_eastern), date_part('month', ts_milli_no_tz), date_part('month', ts_milli_utc), date_part('month', ts_milli_eastern) FROM source_ts; @@ -228,6 +245,26 @@ SELECT EXTRACT('year' FROM timestamp '2020-09-08T12:00:00+00:00') ---- 2020 +query I +SELECT date_part('ISOYEAR', CAST('2000-01-01' AS DATE)) +---- +1999 + +query I +SELECT EXTRACT(isoyear FROM timestamp '2020-09-08T12:00:00+00:00') +---- +2020 + +query I +SELECT EXTRACT("isoyear" FROM timestamp '2020-09-08T12:00:00+00:00') +---- +2020 + +query I +SELECT EXTRACT('isoyear' FROM timestamp '2020-09-08T12:00:00+00:00') +---- +2020 + query I SELECT date_part('QUARTER', CAST('2000-01-01' AS DATE)) ---- @@ -865,9 +902,15 @@ SELECT extract(month from arrow_cast('20 months', 'Interval(YearMonth)')) ---- 8 +query error DataFusion error: Arrow error: Compute error: YearISO does not support: Interval\(YearMonth\) +SELECT extract(isoyear from arrow_cast('10 years', 'Interval(YearMonth)')) + query error DataFusion error: Arrow error: Compute error: Year does not support: Interval\(DayTime\) SELECT extract(year from arrow_cast('10 days', 'Interval(DayTime)')) +query error DataFusion error: Arrow error: Compute error: YearISO does not support: Interval\(DayTime\) +SELECT extract(isoyear from arrow_cast('10 days', 'Interval(DayTime)')) + query error DataFusion error: Arrow error: Compute error: Month does not support: Interval\(DayTime\) SELECT extract(month from arrow_cast('10 days', 'Interval(DayTime)')) @@ -1011,6 +1054,9 @@ SELECT extract(month from arrow_cast(864000, 'Duration(Second)')) query error DataFusion error: Arrow error: Compute error: Year does not support: Duration\(s\) SELECT extract(year from arrow_cast(864000, 'Duration(Second)')) +query error DataFusion error: Arrow error: Compute error: YearISO does not support: Duration\(s\) +SELECT extract(isoyear from arrow_cast(864000, 'Duration(Second)')) + query I SELECT extract(day from arrow_cast(NULL, 'Duration(Second)')) ---- @@ -1023,6 +1069,11 @@ SELECT (date_part('year', now()) = EXTRACT(year FROM now())) ---- true +query B +SELECT (date_part('isoyear', now()) = EXTRACT(isoyear FROM now())) +---- +true + query B SELECT (date_part('quarter', now()) = EXTRACT(quarter FROM now())) ---- diff --git a/datafusion/sqllogictest/test_files/spark/datetime/date_part.slt b/datafusion/sqllogictest/test_files/spark/datetime/date_part.slt index cd3271cdc7df8..48216bd551692 100644 --- a/datafusion/sqllogictest/test_files/spark/datetime/date_part.slt +++ b/datafusion/sqllogictest/test_files/spark/datetime/date_part.slt @@ -15,48 +15,262 @@ # specific language governing permissions and limitations # under the License. -# This file was originally created by a porting script from: -# https://github.com/lakehq/sail/tree/43b6ed8221de5c4c4adbedbb267ae1351158b43c/crates/sail-spark-connect/tests/gold_data/function -# This file is part of the implementation of the datafusion-spark function library. -# For more information, please see: -# https://github.com/apache/datafusion/issues/15914 - -## Original Query: SELECT date_part('MINUTE', INTERVAL '123 23:55:59.002001' DAY TO SECOND); -## PySpark 3.5.5 Result: {"date_part(MINUTE, INTERVAL '123 23:55:59.002001' DAY TO SECOND)": 55, "typeof(date_part(MINUTE, INTERVAL '123 23:55:59.002001' DAY TO SECOND))": 'tinyint', 'typeof(MINUTE)': 'string', "typeof(INTERVAL '123 23:55:59.002001' DAY TO SECOND)": 'interval day to second'} -#query -#SELECT date_part('MINUTE'::string, INTERVAL '123 23:55:59.002001' DAY TO SECOND::interval day to second); - -## Original Query: SELECT date_part('MONTH', INTERVAL '2021-11' YEAR TO MONTH); -## PySpark 3.5.5 Result: {"date_part(MONTH, INTERVAL '2021-11' YEAR TO MONTH)": 11, "typeof(date_part(MONTH, INTERVAL '2021-11' YEAR TO MONTH))": 'tinyint', 'typeof(MONTH)': 'string', "typeof(INTERVAL '2021-11' YEAR TO MONTH)": 'interval year to month'} -#query -#SELECT date_part('MONTH'::string, INTERVAL '2021-11' YEAR TO MONTH::interval year to month); - -## Original Query: SELECT date_part('SECONDS', timestamp'2019-10-01 00:00:01.000001'); -## PySpark 3.5.5 Result: {"date_part(SECONDS, TIMESTAMP '2019-10-01 00:00:01.000001')": Decimal('1.000001'), "typeof(date_part(SECONDS, TIMESTAMP '2019-10-01 00:00:01.000001'))": 'decimal(8,6)', 'typeof(SECONDS)': 'string', "typeof(TIMESTAMP '2019-10-01 00:00:01.000001')": 'timestamp'} -#query -#SELECT date_part('SECONDS'::string, TIMESTAMP '2019-10-01 00:00:01.000001'::timestamp); - -## Original Query: SELECT date_part('YEAR', TIMESTAMP '2019-08-12 01:00:00.123456'); -## PySpark 3.5.5 Result: {"date_part(YEAR, TIMESTAMP '2019-08-12 01:00:00.123456')": 2019, "typeof(date_part(YEAR, TIMESTAMP '2019-08-12 01:00:00.123456'))": 'int', 'typeof(YEAR)': 'string', "typeof(TIMESTAMP '2019-08-12 01:00:00.123456')": 'timestamp'} -#query -#SELECT date_part('YEAR'::string, TIMESTAMP '2019-08-12 01:00:00.123456'::timestamp); - -## Original Query: SELECT date_part('days', interval 5 days 3 hours 7 minutes); -## PySpark 3.5.5 Result: {"date_part(days, INTERVAL '5 03:07' DAY TO MINUTE)": 5, "typeof(date_part(days, INTERVAL '5 03:07' DAY TO MINUTE))": 'int', 'typeof(days)': 'string', "typeof(INTERVAL '5 03:07' DAY TO MINUTE)": 'interval day to minute'} -#query -#SELECT date_part('days'::string, INTERVAL '5 03:07' DAY TO MINUTE::interval day to minute); - -## Original Query: SELECT date_part('doy', DATE'2019-08-12'); -## PySpark 3.5.5 Result: {"date_part(doy, DATE '2019-08-12')": 224, "typeof(date_part(doy, DATE '2019-08-12'))": 'int', 'typeof(doy)': 'string', "typeof(DATE '2019-08-12')": 'date'} -#query -#SELECT date_part('doy'::string, DATE '2019-08-12'::date); - -## Original Query: SELECT date_part('seconds', interval 5 hours 30 seconds 1 milliseconds 1 microseconds); -## PySpark 3.5.5 Result: {"date_part(seconds, INTERVAL '05:00:30.001001' HOUR TO SECOND)": Decimal('30.001001'), "typeof(date_part(seconds, INTERVAL '05:00:30.001001' HOUR TO SECOND))": 'decimal(8,6)', 'typeof(seconds)': 'string', "typeof(INTERVAL '05:00:30.001001' HOUR TO SECOND)": 'interval hour to second'} -#query -#SELECT date_part('seconds'::string, INTERVAL '05:00:30.001001' HOUR TO SECOND::interval hour to second); - -## Original Query: SELECT date_part('week', timestamp'2019-08-12 01:00:00.123456'); -## PySpark 3.5.5 Result: {"date_part(week, TIMESTAMP '2019-08-12 01:00:00.123456')": 33, "typeof(date_part(week, TIMESTAMP '2019-08-12 01:00:00.123456'))": 'int', 'typeof(week)': 'string', "typeof(TIMESTAMP '2019-08-12 01:00:00.123456')": 'timestamp'} -#query -#SELECT date_part('week'::string, TIMESTAMP '2019-08-12 01:00:00.123456'::timestamp); +# YEAR +query I +SELECT date_part('YEAR'::string, '2000-01-01'::date); +---- +2000 + +query I +SELECT date_part('YEARS'::string, '2000-01-01'::date); +---- +2000 + +query I +SELECT date_part('Y'::string, '2000-01-01'::date); +---- +2000 + +query I +SELECT date_part('YR'::string, '2000-01-01'::date); +---- +2000 + +query I +SELECT date_part('YRS'::string, '2000-01-01'::date); +---- +2000 + +# YEAROFWEEK +query I +SELECT date_part('YEAROFWEEK'::string, '2000-01-01'::date); +---- +1999 + +# QUARTER +query I +SELECT date_part('QUARTER'::string, '2000-01-01'::date); +---- +1 + +query I +SELECT date_part('QTR'::string, '2000-01-01'::date); +---- +1 + +# MONTH +query I +SELECT date_part('MONTH'::string, '2000-01-01'::date); +---- +1 + +query I +SELECT date_part('MON'::string, '2000-01-01'::date); +---- +1 + +query I +SELECT date_part('MONS'::string, '2000-01-01'::date); +---- +1 + +query I +SELECT date_part('MONTHS'::string, '2000-01-01'::date); +---- +1 + +# WEEK +query I +SELECT date_part('WEEK'::string, '2000-01-01'::date); +---- +52 + +query I +SELECT date_part('WEEKS'::string, '2000-01-01'::date); +---- +52 + +query I +SELECT date_part('W'::string, '2000-01-01'::date); +---- +52 + +# DAYS +query I +SELECT date_part('DAY'::string, '2000-01-01'::date); +---- +1 + +query I +SELECT date_part('D'::string, '2000-01-01'::date); +---- +1 + +query I +SELECT date_part('DAYS'::string, '2000-01-01'::date); +---- +1 + +# DAYOFWEEK +query I +SELECT date_part('DAYOFWEEK'::string, '2000-01-01'::date); +---- +7 + +query I +SELECT date_part('DOW'::string, '2000-01-01'::date); +---- +7 + +# DAYOFWEEK_ISO +query I +SELECT date_part('DAYOFWEEK_ISO'::string, '2000-01-01'::date); +---- +6 + +query I +SELECT date_part('DOW_ISO'::string, '2000-01-01'::date); +---- +6 + +# DOY +query I +SELECT date_part('DOY'::string, '2000-01-01'::date); +---- +1 + +# HOUR +query I +SELECT date_part('HOUR'::string, '2000-01-01 12:30:45'::timestamp); +---- +12 + +query I +SELECT date_part('H'::string, '2000-01-01 12:30:45'::timestamp); +---- +12 + +query I +SELECT date_part('HOURS'::string, '2000-01-01 12:30:45'::timestamp); +---- +12 + +query I +SELECT date_part('HR'::string, '2000-01-01 12:30:45'::timestamp); +---- +12 + +query I +SELECT date_part('HRS'::string, '2000-01-01 12:30:45'::timestamp); +---- +12 + +# MINUTE +query I +SELECT date_part('MINUTE'::string, '2000-01-01 12:30:45'::timestamp); +---- +30 + +query I +SELECT date_part('M'::string, '2000-01-01 12:30:45'::timestamp); +---- +30 + +query I +SELECT date_part('MIN'::string, '2000-01-01 12:30:45'::timestamp); +---- +30 + +query I +SELECT date_part('MINS'::string, '2000-01-01 12:30:45'::timestamp); +---- +30 + +query I +SELECT date_part('MINUTES'::string, '2000-01-01 12:30:45'::timestamp); +---- +30 + +# SECOND +query I +SELECT date_part('SECOND'::string, '2000-01-01 12:30:45'::timestamp); +---- +45 + +query I +SELECT date_part('S'::string, '2000-01-01 12:30:45'::timestamp); +---- +45 + +query I +SELECT date_part('SEC'::string, '2000-01-01 12:30:45'::timestamp); +---- +45 + +query I +SELECT date_part('SECONDS'::string, '2000-01-01 12:30:45'::timestamp); +---- +45 + +query I +SELECT date_part('SECS'::string, '2000-01-01 12:30:45'::timestamp); +---- +45 + +# NULL input +query I +SELECT date_part('year'::string, NULL::timestamp); +---- +NULL + +query error Internal error: First argument of `DATE_PART` must be non-null scalar Utf8 +SELECT date_part(NULL::string, '2000-01-01'::date); + +# Invalid part +query error DataFusion error: Execution error: Date part 'test' not supported +SELECT date_part('test'::string, '2000-01-01'::date); + +query I +SELECT date_part('year', column1) +FROM VALUES +('2022-03-15'::date), +('1999-12-31'::date), +('2000-01-01'::date), +(NULL::date); +---- +2022 +1999 +2000 +NULL + +query I +SELECT date_part('minutes', column1) +FROM VALUES +('2022-03-15 12:30:45'::timestamp), +('1999-12-31 12:32:45'::timestamp), +('2000-01-01 12:00:45'::timestamp), +(NULL::timestamp); +---- +30 +32 +0 +NULL + +# alias datepart +query I +SELECT datepart('YEAR'::string, '2000-01-01'::date); +---- +2000 + +query I +SELECT datepart('year', column1) +FROM VALUES +('2022-03-15'::date), +('1999-12-31'::date), +('2000-01-01'::date), +(NULL::date); +---- +2022 +1999 +2000 +NULL diff --git a/datafusion/sqllogictest/test_files/spark/datetime/datepart.slt b/datafusion/sqllogictest/test_files/spark/datetime/datepart.slt deleted file mode 100644 index b2dd0089c2823..0000000000000 --- a/datafusion/sqllogictest/test_files/spark/datetime/datepart.slt +++ /dev/null @@ -1,62 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at - -# http://www.apache.org/licenses/LICENSE-2.0 - -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# This file was originally created by a porting script from: -# https://github.com/lakehq/sail/tree/43b6ed8221de5c4c4adbedbb267ae1351158b43c/crates/sail-spark-connect/tests/gold_data/function -# This file is part of the implementation of the datafusion-spark function library. -# For more information, please see: -# https://github.com/apache/datafusion/issues/15914 - -## Original Query: SELECT datepart('MINUTE', INTERVAL '123 23:55:59.002001' DAY TO SECOND); -## PySpark 3.5.5 Result: {"datepart(MINUTE FROM INTERVAL '123 23:55:59.002001' DAY TO SECOND)": 55, "typeof(datepart(MINUTE FROM INTERVAL '123 23:55:59.002001' DAY TO SECOND))": 'tinyint', 'typeof(MINUTE)': 'string', "typeof(INTERVAL '123 23:55:59.002001' DAY TO SECOND)": 'interval day to second'} -#query -#SELECT datepart('MINUTE'::string, INTERVAL '123 23:55:59.002001' DAY TO SECOND::interval day to second); - -## Original Query: SELECT datepart('MONTH', INTERVAL '2021-11' YEAR TO MONTH); -## PySpark 3.5.5 Result: {"datepart(MONTH FROM INTERVAL '2021-11' YEAR TO MONTH)": 11, "typeof(datepart(MONTH FROM INTERVAL '2021-11' YEAR TO MONTH))": 'tinyint', 'typeof(MONTH)': 'string', "typeof(INTERVAL '2021-11' YEAR TO MONTH)": 'interval year to month'} -#query -#SELECT datepart('MONTH'::string, INTERVAL '2021-11' YEAR TO MONTH::interval year to month); - -## Original Query: SELECT datepart('SECONDS', timestamp'2019-10-01 00:00:01.000001'); -## PySpark 3.5.5 Result: {"datepart(SECONDS FROM TIMESTAMP '2019-10-01 00:00:01.000001')": Decimal('1.000001'), "typeof(datepart(SECONDS FROM TIMESTAMP '2019-10-01 00:00:01.000001'))": 'decimal(8,6)', 'typeof(SECONDS)': 'string', "typeof(TIMESTAMP '2019-10-01 00:00:01.000001')": 'timestamp'} -#query -#SELECT datepart('SECONDS'::string, TIMESTAMP '2019-10-01 00:00:01.000001'::timestamp); - -## Original Query: SELECT datepart('YEAR', TIMESTAMP '2019-08-12 01:00:00.123456'); -## PySpark 3.5.5 Result: {"datepart(YEAR FROM TIMESTAMP '2019-08-12 01:00:00.123456')": 2019, "typeof(datepart(YEAR FROM TIMESTAMP '2019-08-12 01:00:00.123456'))": 'int', 'typeof(YEAR)': 'string', "typeof(TIMESTAMP '2019-08-12 01:00:00.123456')": 'timestamp'} -#query -#SELECT datepart('YEAR'::string, TIMESTAMP '2019-08-12 01:00:00.123456'::timestamp); - -## Original Query: SELECT datepart('days', interval 5 days 3 hours 7 minutes); -## PySpark 3.5.5 Result: {"datepart(days FROM INTERVAL '5 03:07' DAY TO MINUTE)": 5, "typeof(datepart(days FROM INTERVAL '5 03:07' DAY TO MINUTE))": 'int', 'typeof(days)': 'string', "typeof(INTERVAL '5 03:07' DAY TO MINUTE)": 'interval day to minute'} -#query -#SELECT datepart('days'::string, INTERVAL '5 03:07' DAY TO MINUTE::interval day to minute); - -## Original Query: SELECT datepart('doy', DATE'2019-08-12'); -## PySpark 3.5.5 Result: {"datepart(doy FROM DATE '2019-08-12')": 224, "typeof(datepart(doy FROM DATE '2019-08-12'))": 'int', 'typeof(doy)': 'string', "typeof(DATE '2019-08-12')": 'date'} -#query -#SELECT datepart('doy'::string, DATE '2019-08-12'::date); - -## Original Query: SELECT datepart('seconds', interval 5 hours 30 seconds 1 milliseconds 1 microseconds); -## PySpark 3.5.5 Result: {"datepart(seconds FROM INTERVAL '05:00:30.001001' HOUR TO SECOND)": Decimal('30.001001'), "typeof(datepart(seconds FROM INTERVAL '05:00:30.001001' HOUR TO SECOND))": 'decimal(8,6)', 'typeof(seconds)': 'string', "typeof(INTERVAL '05:00:30.001001' HOUR TO SECOND)": 'interval hour to second'} -#query -#SELECT datepart('seconds'::string, INTERVAL '05:00:30.001001' HOUR TO SECOND::interval hour to second); - -## Original Query: SELECT datepart('week', timestamp'2019-08-12 01:00:00.123456'); -## PySpark 3.5.5 Result: {"datepart(week FROM TIMESTAMP '2019-08-12 01:00:00.123456')": 33, "typeof(datepart(week FROM TIMESTAMP '2019-08-12 01:00:00.123456'))": 'int', 'typeof(week)': 'string', "typeof(TIMESTAMP '2019-08-12 01:00:00.123456')": 'timestamp'} -#query -#SELECT datepart('week'::string, TIMESTAMP '2019-08-12 01:00:00.123456'::timestamp);