diff --git a/datafusion/spark/src/function/datetime/date_trunc.rs b/datafusion/spark/src/function/datetime/date_trunc.rs new file mode 100644 index 0000000000000..880eb9a1a4786 --- /dev/null +++ b/datafusion/spark/src/function/datetime/date_trunc.rs @@ -0,0 +1,167 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::any::Any; +use std::sync::Arc; + +use arrow::datatypes::{DataType, Field, FieldRef, TimeUnit}; +use datafusion_common::types::{NativeType, logical_string}; +use datafusion_common::utils::take_function_args; +use datafusion_common::{Result, ScalarValue, internal_err, plan_err}; +use datafusion_expr::expr::ScalarFunction; +use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyContext}; +use datafusion_expr::{ + Coercion, ColumnarValue, Expr, ExprSchemable, ReturnFieldArgs, ScalarFunctionArgs, + ScalarUDFImpl, Signature, TypeSignatureClass, Volatility, +}; + +/// Spark date_trunc supports extra format aliases. +/// It also handles timestamps with timezones by converting to session timezone first. +/// +#[derive(Debug, PartialEq, Eq, Hash)] +pub struct SparkDateTrunc { + signature: Signature, +} + +impl Default for SparkDateTrunc { + fn default() -> Self { + Self::new() + } +} + +impl SparkDateTrunc { + pub fn new() -> Self { + Self { + signature: Signature::coercible( + vec![ + Coercion::new_exact(TypeSignatureClass::Native(logical_string())), + Coercion::new_implicit( + TypeSignatureClass::Timestamp, + vec![TypeSignatureClass::Native(logical_string())], + NativeType::Timestamp(TimeUnit::Microsecond, None), + ), + ], + Volatility::Immutable, + ), + } + } +} + +impl ScalarUDFImpl for SparkDateTrunc { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + "date_trunc" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, _arg_types: &[DataType]) -> Result { + internal_err!("return_field_from_args should be used instead") + } + + fn return_field_from_args(&self, args: ReturnFieldArgs) -> Result { + let nullable = args.arg_fields.iter().any(|f| f.is_nullable()); + + Ok(Arc::new(Field::new( + self.name(), + DataType::Timestamp(TimeUnit::Microsecond, None), + nullable, + ))) + } + + fn invoke_with_args(&self, _args: ScalarFunctionArgs) -> Result { + internal_err!( + "spark date_trunc should have been simplified to standard date_trunc" + ) + } + + fn simplify( + &self, + args: Vec, + info: &SimplifyContext, + ) -> Result { + let [fmt_expr, ts_expr] = take_function_args(self.name(), args)?; + + let fmt = match fmt_expr.as_literal() { + Some(ScalarValue::Utf8(Some(v))) + | Some(ScalarValue::Utf8View(Some(v))) + | Some(ScalarValue::LargeUtf8(Some(v))) => v.to_lowercase(), + _ => { + return plan_err!( + "First argument of `DATE_TRUNC` must be non-null scalar Utf8" + ); + } + }; + + // Map Spark-specific fmt aliases to datafusion ones + let fmt = match fmt.as_str() { + "yy" | "yyyy" => "year", + "mm" | "mon" => "month", + "dd" => "day", + other => other, + }; + + let session_tz = info.config_options().execution.time_zone.clone(); + let ts_type = ts_expr.get_type(info.schema())?; + + // Spark interprets timestamps in the session timezone before truncating, + // then returns a timestamp without timezone at microsecond precision. + // See: https://github.com/apache/spark/blob/f310f4fcc95580a6824bc7d22b76006f79b8804a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala#L492 + // + // For sub-second truncations (second, millisecond, microsecond), timezone + // adjustment is unnecessary since timezone offsets are whole seconds. + let ts_expr = match (&ts_type, fmt) { + // Sub-second truncations don't need timezone adjustment + (_, "second" | "millisecond" | "microsecond") => ts_expr, + + // Timestamp with timezone: convert to local time (applying session tz if needed) + (DataType::Timestamp(_, Some(_)), _) => { + let ts_with_tz = match &session_tz { + Some(tz) => ts_expr.cast_to( + &DataType::Timestamp( + TimeUnit::Microsecond, + Some(Arc::from(tz.as_str())), + ), + info.schema(), + )?, + None => ts_expr, + }; + Expr::ScalarFunction(ScalarFunction::new_udf( + datafusion_functions::datetime::to_local_time(), + vec![ts_with_tz], + )) + } + + // Timestamp without timezone: use as-is + _ => ts_expr, + }; + + let fmt_expr = Expr::Literal(ScalarValue::new_utf8(fmt), None); + + Ok(ExprSimplifyResult::Simplified(Expr::ScalarFunction( + ScalarFunction::new_udf( + datafusion_functions::datetime::date_trunc(), + vec![fmt_expr, ts_expr], + ), + ))) + } +} diff --git a/datafusion/spark/src/function/datetime/mod.rs b/datafusion/spark/src/function/datetime/mod.rs index 849aa20895990..e20dfef45ef24 100644 --- a/datafusion/spark/src/function/datetime/mod.rs +++ b/datafusion/spark/src/function/datetime/mod.rs @@ -17,11 +17,14 @@ pub mod date_add; pub mod date_sub; +pub mod date_trunc; pub mod extract; pub mod last_day; pub mod make_dt_interval; pub mod make_interval; pub mod next_day; +pub mod time_trunc; +pub mod trunc; use datafusion_expr::ScalarUDF; use datafusion_functions::make_udf_function; @@ -36,6 +39,9 @@ make_udf_function!(last_day::SparkLastDay, last_day); make_udf_function!(make_dt_interval::SparkMakeDtInterval, make_dt_interval); make_udf_function!(make_interval::SparkMakeInterval, make_interval); make_udf_function!(next_day::SparkNextDay, next_day); +make_udf_function!(date_trunc::SparkDateTrunc, date_trunc); +make_udf_function!(time_trunc::SparkTimeTrunc, time_trunc); +make_udf_function!(trunc::SparkTrunc, trunc); pub mod expr_fn { use datafusion_functions::export_functions; @@ -83,6 +89,21 @@ pub mod expr_fn { "Returns the first date which is later than start_date and named as indicated. The function returns NULL if at least one of the input parameters is NULL.", arg1 arg2 )); + export_functions!(( + date_trunc, + "Truncates a timestamp `ts` to the unit specified by the format `fmt`.", + fmt ts + )); + export_functions!(( + time_trunc, + "Truncates a time `t` to the unit specified by the format `fmt`.", + fmt t + )); + export_functions!(( + trunc, + "Truncates a date `dt` to the unit specified by the format `fmt`.", + dt fmt + )); } pub fn functions() -> Vec> { @@ -96,5 +117,8 @@ pub fn functions() -> Vec> { make_dt_interval(), make_interval(), next_day(), + date_trunc(), + time_trunc(), + trunc(), ] } diff --git a/datafusion/spark/src/function/datetime/time_trunc.rs b/datafusion/spark/src/function/datetime/time_trunc.rs new file mode 100644 index 0000000000000..718502a05ee6d --- /dev/null +++ b/datafusion/spark/src/function/datetime/time_trunc.rs @@ -0,0 +1,122 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::any::Any; +use std::sync::Arc; + +use arrow::datatypes::{DataType, Field, FieldRef}; +use datafusion_common::types::logical_string; +use datafusion_common::{Result, ScalarValue, internal_err, plan_err}; +use datafusion_expr::expr::ScalarFunction; +use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyContext}; +use datafusion_expr::{ + Coercion, ColumnarValue, Expr, ReturnFieldArgs, ScalarFunctionArgs, ScalarUDFImpl, + Signature, TypeSignatureClass, Volatility, +}; + +/// Spark time_trunc function only handles time inputs. +/// +#[derive(Debug, PartialEq, Eq, Hash)] +pub struct SparkTimeTrunc { + signature: Signature, +} + +impl Default for SparkTimeTrunc { + fn default() -> Self { + Self::new() + } +} + +impl SparkTimeTrunc { + pub fn new() -> Self { + Self { + signature: Signature::coercible( + vec![ + Coercion::new_exact(TypeSignatureClass::Native(logical_string())), + Coercion::new_exact(TypeSignatureClass::Time), + ], + Volatility::Immutable, + ), + } + } +} + +impl ScalarUDFImpl for SparkTimeTrunc { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + "time_trunc" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, _arg_types: &[DataType]) -> Result { + internal_err!("return_field_from_args should be used instead") + } + + fn return_field_from_args(&self, args: ReturnFieldArgs) -> Result { + let nullable = args.arg_fields.iter().any(|f| f.is_nullable()); + + Ok(Arc::new(Field::new( + self.name(), + args.arg_fields[1].data_type().clone(), + nullable, + ))) + } + + fn invoke_with_args(&self, _args: ScalarFunctionArgs) -> Result { + internal_err!( + "spark time_trunc should have been simplified to standard date_trunc" + ) + } + + fn simplify( + &self, + args: Vec, + _info: &SimplifyContext, + ) -> Result { + let fmt_expr = &args[0]; + + let fmt = match fmt_expr.as_literal() { + Some(ScalarValue::Utf8(Some(v))) + | Some(ScalarValue::Utf8View(Some(v))) + | Some(ScalarValue::LargeUtf8(Some(v))) => v.to_lowercase(), + _ => { + return plan_err!( + "First argument of `TIME_TRUNC` must be non-null scalar Utf8" + ); + } + }; + + if !matches!( + fmt.as_str(), + "hour" | "minute" | "second" | "millisecond" | "microsecond" + ) { + return plan_err!( + "The format argument of `TIME_TRUNC` must be one of: hour, minute, second, millisecond, microsecond" + ); + } + + Ok(ExprSimplifyResult::Simplified(Expr::ScalarFunction( + ScalarFunction::new_udf(datafusion_functions::datetime::date_trunc(), args), + ))) + } +} diff --git a/datafusion/spark/src/function/datetime/trunc.rs b/datafusion/spark/src/function/datetime/trunc.rs new file mode 100644 index 0000000000000..b584cc9a70d44 --- /dev/null +++ b/datafusion/spark/src/function/datetime/trunc.rs @@ -0,0 +1,143 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::any::Any; +use std::sync::Arc; + +use arrow::datatypes::{DataType, Field, FieldRef, TimeUnit}; +use datafusion_common::types::{NativeType, logical_date, logical_string}; +use datafusion_common::utils::take_function_args; +use datafusion_common::{Result, ScalarValue, internal_err, plan_err}; +use datafusion_expr::expr::ScalarFunction; +use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyContext}; +use datafusion_expr::{ + Coercion, ColumnarValue, Expr, ExprSchemable, ReturnFieldArgs, ScalarFunctionArgs, + ScalarUDFImpl, Signature, TypeSignatureClass, Volatility, +}; + +/// Spark trunc supports date inputs only and extra format aliases. +/// Also spark trunc's argument order is (date, format). +/// +#[derive(Debug, PartialEq, Eq, Hash)] +pub struct SparkTrunc { + signature: Signature, +} + +impl Default for SparkTrunc { + fn default() -> Self { + Self::new() + } +} + +impl SparkTrunc { + pub fn new() -> Self { + Self { + signature: Signature::coercible( + vec![ + Coercion::new_implicit( + TypeSignatureClass::Native(logical_date()), + vec![TypeSignatureClass::Native(logical_string())], + NativeType::Date, + ), + Coercion::new_exact(TypeSignatureClass::Native(logical_string())), + ], + Volatility::Immutable, + ), + } + } +} + +impl ScalarUDFImpl for SparkTrunc { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + "trunc" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, _arg_types: &[DataType]) -> Result { + internal_err!("return_field_from_args should be used instead") + } + + fn return_field_from_args(&self, args: ReturnFieldArgs) -> Result { + let nullable = args.arg_fields.iter().any(|f| f.is_nullable()); + + Ok(Arc::new(Field::new( + self.name(), + args.arg_fields[0].data_type().clone(), + nullable, + ))) + } + + fn invoke_with_args(&self, _args: ScalarFunctionArgs) -> Result { + internal_err!("spark trunc should have been simplified to standard date_trunc") + } + + fn simplify( + &self, + args: Vec, + info: &SimplifyContext, + ) -> Result { + let [dt_expr, fmt_expr] = take_function_args(self.name(), args)?; + + let fmt = match fmt_expr.as_literal() { + Some(ScalarValue::Utf8(Some(v))) + | Some(ScalarValue::Utf8View(Some(v))) + | Some(ScalarValue::LargeUtf8(Some(v))) => v.to_lowercase(), + _ => { + return plan_err!( + "Second argument of `TRUNC` must be non-null scalar Utf8" + ); + } + }; + + // Map Spark-specific fmt aliases to datafusion ones + let fmt = match fmt.as_str() { + "yy" | "yyyy" => "year", + "mm" | "mon" => "month", + "year" | "month" | "day" | "week" | "quarter" => fmt.as_str(), + _ => { + return plan_err!( + "The format argument of `TRUNC` must be one of: year, yy, yyyy, month, mm, mon, day, week, quarter." + ); + } + }; + let return_type = dt_expr.get_type(info.schema())?; + + let fmt_expr = Expr::Literal(ScalarValue::new_utf8(fmt), None); + + // Spark uses Dates so we need to cast to timestamp and back to work with datafusion's date_trunc + Ok(ExprSimplifyResult::Simplified( + Expr::ScalarFunction(ScalarFunction::new_udf( + datafusion_functions::datetime::date_trunc(), + vec![ + fmt_expr, + dt_expr.cast_to( + &DataType::Timestamp(TimeUnit::Nanosecond, None), + info.schema(), + )?, + ], + )) + .cast_to(&return_type, info.schema())?, + )) + } +} diff --git a/datafusion/sqllogictest/test_files/spark/datetime/date_trunc.slt b/datafusion/sqllogictest/test_files/spark/datetime/date_trunc.slt index 8a15254e6795e..705c320934fe8 100644 --- a/datafusion/sqllogictest/test_files/spark/datetime/date_trunc.slt +++ b/datafusion/sqllogictest/test_files/spark/datetime/date_trunc.slt @@ -15,33 +15,150 @@ # specific language governing permissions and limitations # under the License. -# This file was originally created by a porting script from: -# https://github.com/lakehq/sail/tree/43b6ed8221de5c4c4adbedbb267ae1351158b43c/crates/sail-spark-connect/tests/gold_data/function -# This file is part of the implementation of the datafusion-spark function library. -# For more information, please see: -# https://github.com/apache/datafusion/issues/15914 - -## Original Query: SELECT date_trunc('DD', '2015-03-05T09:32:05.359'); -## PySpark 3.5.5 Result: {'date_trunc(DD, 2015-03-05T09:32:05.359)': datetime.datetime(2015, 3, 5, 0, 0), 'typeof(date_trunc(DD, 2015-03-05T09:32:05.359))': 'timestamp', 'typeof(DD)': 'string', 'typeof(2015-03-05T09:32:05.359)': 'string'} -#query -#SELECT date_trunc('DD'::string, '2015-03-05T09:32:05.359'::string); - -## Original Query: SELECT date_trunc('HOUR', '2015-03-05T09:32:05.359'); -## PySpark 3.5.5 Result: {'date_trunc(HOUR, 2015-03-05T09:32:05.359)': datetime.datetime(2015, 3, 5, 9, 0), 'typeof(date_trunc(HOUR, 2015-03-05T09:32:05.359))': 'timestamp', 'typeof(HOUR)': 'string', 'typeof(2015-03-05T09:32:05.359)': 'string'} -#query -#SELECT date_trunc('HOUR'::string, '2015-03-05T09:32:05.359'::string); - -## Original Query: SELECT date_trunc('MILLISECOND', '2015-03-05T09:32:05.123456'); -## PySpark 3.5.5 Result: {'date_trunc(MILLISECOND, 2015-03-05T09:32:05.123456)': datetime.datetime(2015, 3, 5, 9, 32, 5, 123000), 'typeof(date_trunc(MILLISECOND, 2015-03-05T09:32:05.123456))': 'timestamp', 'typeof(MILLISECOND)': 'string', 'typeof(2015-03-05T09:32:05.123456)': 'string'} -#query -#SELECT date_trunc('MILLISECOND'::string, '2015-03-05T09:32:05.123456'::string); - -## Original Query: SELECT date_trunc('MM', '2015-03-05T09:32:05.359'); -## PySpark 3.5.5 Result: {'date_trunc(MM, 2015-03-05T09:32:05.359)': datetime.datetime(2015, 3, 1, 0, 0), 'typeof(date_trunc(MM, 2015-03-05T09:32:05.359))': 'timestamp', 'typeof(MM)': 'string', 'typeof(2015-03-05T09:32:05.359)': 'string'} -#query -#SELECT date_trunc('MM'::string, '2015-03-05T09:32:05.359'::string); - -## Original Query: SELECT date_trunc('YEAR', '2015-03-05T09:32:05.359'); -## PySpark 3.5.5 Result: {'date_trunc(YEAR, 2015-03-05T09:32:05.359)': datetime.datetime(2015, 1, 1, 0, 0), 'typeof(date_trunc(YEAR, 2015-03-05T09:32:05.359))': 'timestamp', 'typeof(YEAR)': 'string', 'typeof(2015-03-05T09:32:05.359)': 'string'} -#query -#SELECT date_trunc('YEAR'::string, '2015-03-05T09:32:05.359'::string); +# YEAR - truncate to first date of year, time zeroed +query P +SELECT date_trunc('YEAR', '2015-03-05T09:32:05.123456'::timestamp); +---- +2015-01-01T00:00:00 + +query P +SELECT date_trunc('YYYY', '2015-03-05T09:32:05.123456'::timestamp); +---- +2015-01-01T00:00:00 + +query P +SELECT date_trunc('YY', '2015-03-05T09:32:05.123456'::timestamp); +---- +2015-01-01T00:00:00 + +# QUARTER - truncate to first date of quarter, time zeroed +query P +SELECT date_trunc('QUARTER', '2015-05-05T09:32:05.123456'::timestamp); +---- +2015-04-01T00:00:00 + +# MONTH - truncate to first date of month, time zeroed +query P +SELECT date_trunc('MONTH', '2015-03-05T09:32:05.123456'::timestamp); +---- +2015-03-01T00:00:00 + +query P +SELECT date_trunc('MM', '2015-03-05T09:32:05.123456'::timestamp); +---- +2015-03-01T00:00:00 + +query P +SELECT date_trunc('MON', '2015-03-05T09:32:05.123456'::timestamp); +---- +2015-03-01T00:00:00 + +# WEEK - truncate to Monday of the week, time zeroed +query P +SELECT date_trunc('WEEK', '2015-03-05T09:32:05.123456'::timestamp); +---- +2015-03-02T00:00:00 + +# DAY - zero out time part +query P +SELECT date_trunc('DAY', '2015-03-05T09:32:05.123456'::timestamp); +---- +2015-03-05T00:00:00 + +query P +SELECT date_trunc('DD', '2015-03-05T09:32:05.123456'::timestamp); +---- +2015-03-05T00:00:00 + +# HOUR - zero out minute and second with fraction +query P +SELECT date_trunc('HOUR', '2015-03-05T09:32:05.123456'::timestamp); +---- +2015-03-05T09:00:00 + +# MINUTE - zero out second with fraction +query P +SELECT date_trunc('MINUTE', '2015-03-05T09:32:05.123456'::timestamp); +---- +2015-03-05T09:32:00 + +# SECOND - zero out fraction +query P +SELECT date_trunc('SECOND', '2015-03-05T09:32:05.123456'::timestamp); +---- +2015-03-05T09:32:05 + +# MILLISECOND - zero out microseconds +query P +SELECT date_trunc('MILLISECOND', '2015-03-05T09:32:05.123456'::timestamp); +---- +2015-03-05T09:32:05.123 + +# MICROSECOND - everything remains +query P +SELECT date_trunc('MICROSECOND', '2015-03-05T09:32:05.123456'::timestamp); +---- +2015-03-05T09:32:05.123456 + +query P +SELECT date_trunc('YEAR', column1) +FROM VALUES +('2015-03-05T09:32:05.123456'::timestamp), +('2020-11-15T22:45:30.654321'::timestamp), +('1999-07-20T14:20:10.000001'::timestamp), +(NULL::timestamp); +---- +2015-01-01T00:00:00 +2020-01-01T00:00:00 +1999-01-01T00:00:00 +NULL + +# String input +query P +SELECT date_trunc('YEAR', '2015-03-05T09:32:05.123456'); +---- +2015-01-01T00:00:00 + +# Null handling +query error DataFusion error: Optimizer rule 'simplify_expressions' failed\ncaused by\nError during planning: First argument of `DATE_TRUNC` must be non-null scalar Utf8 +SELECT date_trunc(NULL, '2015-03-05T09:32:05.123456'); + +query P +SELECT date_trunc('YEAR', NULL::timestamp); +---- +NULL + +# incorrect format +query error DataFusion error: Execution error: Unsupported date_trunc granularity: 'test'. Supported values are: microsecond, millisecond, second, minute, hour, day, week, month, quarter, year +SELECT date_trunc('test', '2015-03-05T09:32:05.123456'); + +# Timezone handling - Spark-compatible behavior +# Spark converts timestamps to session timezone before truncating for coarse granularities + +query P +SELECT date_trunc('DAY', arrow_cast(timestamp '2024-07-15T03:30:00', 'Timestamp(Microsecond, Some("UTC"))')); +---- +2024-07-15T00:00:00 + +query P +SELECT date_trunc('DAY', arrow_cast(timestamp '2024-07-15T03:30:00', 'Timestamp(Microsecond, None)')); +---- +2024-07-15T00:00:00 + +statement ok +SET datafusion.execution.time_zone = 'America/New_York'; + +# This timestamp is 03:30 UTC = 23:30 EDT (previous day) on July 14 +# With session timezone, truncation happens in America/New_York timezone +query P +SELECT date_trunc('DAY', arrow_cast(timestamp '2024-07-15T03:30:00', 'Timestamp(Microsecond, Some("UTC"))')); +---- +2024-07-14T00:00:00 + +query P +SELECT date_trunc('DAY', arrow_cast(timestamp '2024-07-15T03:30:00', 'Timestamp(Microsecond, None)')); +---- +2024-07-15T00:00:00 + +statement ok +RESET datafusion.execution.time_zone; diff --git a/datafusion/sqllogictest/test_files/spark/datetime/time_trunc.slt b/datafusion/sqllogictest/test_files/spark/datetime/time_trunc.slt new file mode 100644 index 0000000000000..f00c40f0a9371 --- /dev/null +++ b/datafusion/sqllogictest/test_files/spark/datetime/time_trunc.slt @@ -0,0 +1,74 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# HOUR - zero out minute and second with fraction +query D +SELECT time_trunc('HOUR', '09:32:05.123456'::time); +---- +09:00:00 + +# MINUTE - zero out second with fraction +query D +SELECT time_trunc('MINUTE', '09:32:05.123456'::time); +---- +09:32:00 + +# SECOND - zero out fraction +query D +SELECT time_trunc('SECOND', '09:32:05.123456'::time); +---- +09:32:05 + +# MILLISECOND - zero out microseconds +query D +SELECT time_trunc('MILLISECOND', '09:32:05.123456'::time); +---- +09:32:05.123 + +# MICROSECOND - everything remains +query D +SELECT time_trunc('MICROSECOND', '09:32:05.123456'::time); +---- +09:32:05.123456 + +query D +SELECT time_trunc('HOUR', column1) +FROM VALUES +('09:32:05.123456'::time), +('22:45:30.654321'::time), +('14:20:10.000001'::time), +(NULL::time); +---- +09:00:00 +22:00:00 +14:00:00 +NULL + + +# Null handling +query error DataFusion error: Optimizer rule 'simplify_expressions' failed\ncaused by\nError during planning: First argument of `TIME_TRUNC` must be non-null scalar Utf8 +SELECT time_trunc(NULL, '09:32:05.123456'::time); + +query D +SELECT time_trunc('HOUR', NULL::time); +---- +NULL + +# incorrect format +query error DataFusion error: Optimizer rule 'simplify_expressions' failed\ncaused by\nError during planning: The format argument of `TIME_TRUNC` must be one of: hour, minute, second, millisecond, microsecond +SELECT time_trunc('test', '09:32:05.123456'::time); + diff --git a/datafusion/sqllogictest/test_files/spark/datetime/trunc.slt b/datafusion/sqllogictest/test_files/spark/datetime/trunc.slt index a502e2f7f7b00..f6bf6b5751ed2 100644 --- a/datafusion/sqllogictest/test_files/spark/datetime/trunc.slt +++ b/datafusion/sqllogictest/test_files/spark/datetime/trunc.slt @@ -15,28 +15,79 @@ # specific language governing permissions and limitations # under the License. -# This file was originally created by a porting script from: -# https://github.com/lakehq/sail/tree/43b6ed8221de5c4c4adbedbb267ae1351158b43c/crates/sail-spark-connect/tests/gold_data/function -# This file is part of the implementation of the datafusion-spark function library. -# For more information, please see: -# https://github.com/apache/datafusion/issues/15914 - -## Original Query: SELECT trunc('2009-02-12', 'MM'); -## PySpark 3.5.5 Result: {'trunc(2009-02-12, MM)': datetime.date(2009, 2, 1), 'typeof(trunc(2009-02-12, MM))': 'date', 'typeof(2009-02-12)': 'string', 'typeof(MM)': 'string'} -#query -#SELECT trunc('2009-02-12'::string, 'MM'::string); - -## Original Query: SELECT trunc('2015-10-27', 'YEAR'); -## PySpark 3.5.5 Result: {'trunc(2015-10-27, YEAR)': datetime.date(2015, 1, 1), 'typeof(trunc(2015-10-27, YEAR))': 'date', 'typeof(2015-10-27)': 'string', 'typeof(YEAR)': 'string'} -#query -#SELECT trunc('2015-10-27'::string, 'YEAR'::string); - -## Original Query: SELECT trunc('2019-08-04', 'quarter'); -## PySpark 3.5.5 Result: {'trunc(2019-08-04, quarter)': datetime.date(2019, 7, 1), 'typeof(trunc(2019-08-04, quarter))': 'date', 'typeof(2019-08-04)': 'string', 'typeof(quarter)': 'string'} -#query -#SELECT trunc('2019-08-04'::string, 'quarter'::string); - -## Original Query: SELECT trunc('2019-08-04', 'week'); -## PySpark 3.5.5 Result: {'trunc(2019-08-04, week)': datetime.date(2019, 7, 29), 'typeof(trunc(2019-08-04, week))': 'date', 'typeof(2019-08-04)': 'string', 'typeof(week)': 'string'} -#query -#SELECT trunc('2019-08-04'::string, 'week'::string); +# YEAR - truncate to first date of year +query D +SELECT trunc('2009-02-12'::date, 'YEAR'::string); +---- +2009-01-01 + +query D +SELECT trunc('2009-02-12'::date, 'YYYY'::string); +---- +2009-01-01 + +query D +SELECT trunc('2009-02-12'::date, 'YY'::string); +---- +2009-01-01 + +# QUARTER - truncate to first date of quarter +query D +SELECT trunc('2009-02-12'::date, 'QUARTER'::string); +---- +2009-01-01 + +# MONTH - truncate to first date of month +query D +SELECT trunc('2009-02-12'::date, 'MONTH'::string); +---- +2009-02-01 + +query D +SELECT trunc('2009-02-12'::date, 'MM'::string); +---- +2009-02-01 + +query D +SELECT trunc('2009-02-12'::date, 'MON'::string); +---- +2009-02-01 + +# WEEK - truncate to Monday of the week +query D +SELECT trunc('2009-02-12'::date, 'WEEK'::string); +---- +2009-02-09 + +# string input +query D +SELECT trunc('2009-02-12'::string, 'YEAR'::string); +---- +2009-01-01 + +query D +SELECT trunc(column1, 'YEAR'::string) +FROM VALUES +('2009-02-12'::date), +('2000-02-12'::date), +('2042-02-12'::date), +(NULL::date); +---- +2009-01-01 +2000-01-01 +2042-01-01 +NULL + +# Null handling +query D +SELECT trunc(NULL::date, 'YEAR'::string); +---- +NULL + +query error DataFusion error: Optimizer rule 'simplify_expressions' failed\ncaused by\nError during planning: Second argument of `TRUNC` must be non-null scalar Utf8 +SELECT trunc('2009-02-12'::date, NULL::string); + +# incorrect format +query error DataFusion error: Optimizer rule 'simplify_expressions' failed\ncaused by\nError during planning: The format argument of `TRUNC` must be one of: year, yy, yyyy, month, mm, mon, day, week, quarter. +SELECT trunc('2009-02-12'::date, 'test'::string); +