Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
103 changes: 103 additions & 0 deletions datafusion/spark/src/function/datetime/add_months.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

use std::any::Any;
use std::ops::Add;
use std::sync::Arc;

use arrow::datatypes::{DataType, Field, FieldRef, IntervalUnit};
use datafusion_common::utils::take_function_args;
use datafusion_common::{Result, internal_err};
use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyContext};
use datafusion_expr::{
Cast, ColumnarValue, Expr, ReturnFieldArgs, ScalarFunctionArgs, ScalarUDFImpl,
Signature, Volatility,
};

/// <https://spark.apache.org/docs/latest/api/sql/index.html#add_months>
#[derive(Debug, PartialEq, Eq, Hash)]
pub struct SparkAddMonths {
signature: Signature,
}

impl Default for SparkAddMonths {
fn default() -> Self {
Self::new()
}
}

impl SparkAddMonths {
pub fn new() -> Self {
Self {
signature: Signature::exact(
vec![DataType::Date32, DataType::Int32],
Volatility::Immutable,
),
}
}
}

impl ScalarUDFImpl for SparkAddMonths {
fn as_any(&self) -> &dyn Any {
self
}

fn name(&self) -> &str {
"add_months"
}

fn signature(&self) -> &Signature {
&self.signature
}

fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
internal_err!("return_field_from_args should be used instead")
}

fn return_field_from_args(&self, args: ReturnFieldArgs) -> Result<FieldRef> {
let nullable = args.arg_fields.iter().any(|f| f.is_nullable())
|| args
.scalar_arguments
.iter()
.any(|arg| matches!(arg, Some(sv) if sv.is_null()));
Comment on lines +72 to +76
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
let nullable = args.arg_fields.iter().any(|f| f.is_nullable())
|| args
.scalar_arguments
.iter()
.any(|arg| matches!(arg, Some(sv) if sv.is_null()));
let nullable = args.arg_fields.iter().any(|f| f.is_nullable());

If an argument is a scalar null then its corresponding field should already be nullable


Ok(Arc::new(Field::new(
self.name(),
DataType::Date32,
nullable,
)))
}

fn simplify(
&self,
args: Vec<Expr>,
_info: &SimplifyContext,
) -> Result<ExprSimplifyResult> {
let [date_arg, months_arg] = take_function_args("add_months", args)?;

Ok(ExprSimplifyResult::Simplified(date_arg.add(Expr::Cast(
Cast::new(
Box::new(months_arg),
DataType::Interval(IntervalUnit::YearMonth),
),
))))
}

fn invoke_with_args(&self, _args: ScalarFunctionArgs) -> Result<ColumnarValue> {
internal_err!("invoke should not be called on a simplified add_months() function")
}
}
8 changes: 8 additions & 0 deletions datafusion/spark/src/function/datetime/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
// specific language governing permissions and limitations
// under the License.

pub mod add_months;
pub mod date_add;
pub mod date_sub;
pub mod extract;
Expand All @@ -27,6 +28,7 @@ use datafusion_expr::ScalarUDF;
use datafusion_functions::make_udf_function;
use std::sync::Arc;

make_udf_function!(add_months::SparkAddMonths, add_months);
make_udf_function!(date_add::SparkDateAdd, date_add);
make_udf_function!(date_sub::SparkDateSub, date_sub);
make_udf_function!(extract::SparkHour, hour);
Expand All @@ -40,6 +42,11 @@ make_udf_function!(next_day::SparkNextDay, next_day);
pub mod expr_fn {
use datafusion_functions::export_functions;

export_functions!((
add_months,
"Returns the date that is months months after start. The function returns NULL if at least one of the input parameters is NULL.",
arg1 arg2
));
export_functions!((
date_add,
"Returns the date that is days days after start. The function returns NULL if at least one of the input parameters is NULL.",
Expand Down Expand Up @@ -87,6 +94,7 @@ pub mod expr_fn {

pub fn functions() -> Vec<Arc<ScalarUDF>> {
vec![
add_months(),
date_add(),
date_sub(),
hour(),
Expand Down
45 changes: 35 additions & 10 deletions datafusion/sqllogictest/test_files/spark/datetime/add_months.slt
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,38 @@
# specific language governing permissions and limitations
# under the License.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does spark have a behaviour for overflow/result date too large that we need to consider?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

saprk will error with ARITHMETIC_OVERFLOW. I tried adding the following test case SELECT add_months('2016-07-30'::date, 2147483647::int); and im getting External error: task 35 panicked with message "NaiveDate + Months out of range". So in a way we have the same behavior as spark but its not testable ?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh this is interesting; panicking is definitely not good, and I can reproduce that on main too:

DataFusion CLI v51.0.0
> select '2016-07-30'::date + arrow_cast(2147483647::int, 'Interval(YearMonth)');

thread 'main' (15797910) panicked at /Users/jeffrey/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/chrono-0.4.42/src/naive/date/mod.rs:2005:41:
`NaiveDate + Months` out of range
note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace

We should raise this as a separate issue as this should be erroring not panicking

Copy link
Contributor Author

@cht42 cht42 Jan 10, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

traced it back to https://github.com/chronotope/chrono/blob/1c0b8f011ab2f2e53c195df1866a1fb4c7fd193a/src/naive/date/mod.rs#L2034

0: __rustc::rust_begin_unwind
             at /rustc/ded5c06cf21d2b93bffd5d884aa6e96934ee4234/library/std/src/panicking.rs:698:5
   1: core::panicking::panic_fmt
             at /rustc/ded5c06cf21d2b93bffd5d884aa6e96934ee4234/library/core/src/panicking.rs:80:14
   2: core::panicking::panic_display
             at /rustc/ded5c06cf21d2b93bffd5d884aa6e96934ee4234/library/core/src/panicking.rs:264:5
   3: core::option::expect_failed
             at /rustc/ded5c06cf21d2b93bffd5d884aa6e96934ee4234/library/core/src/option.rs:2183:5
   4: core::option::Option<T>::expect
             at /Users/chuet/.rustup/toolchains/1.92.0-aarch64-apple-darwin/lib/rustlib/src/rust/library/core/src/option.rs:970:21
   5: <chrono::naive::date::NaiveDate as core::ops::arith::Add<chrono::month::Months>>::add
             at /Users/chuet/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/chrono-0.4.42/src/naive/date/mod.rs:2005:41
   6: arrow_array::delta::shift_months
             at /Users/chuet/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/arrow-array-57.1.0/src/delta.rs:36:30
   7: arrow_array::types::Date32Type::add_year_months
             at /Users/chuet/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/arrow-array-57.1.0/src/types.rs:934:25
   8: <arrow_array::types::Date32Type as arrow_arith::numeric::DateOp>::add_year_month
             at /Users/chuet/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/arrow-arith-57.1.0/src/numeric.rs:554:12
   9: arrow_arith::numeric::date_op::{{closure}}
             at /Users/chuet/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/arrow-arith-57.1.0/src/numeric.rs:775:47
  10: arrow_arith::arity::try_binary_no_nulls
             at /Users/chuet/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/arrow-arith-57.1.0/src/arity.rs:385:35
  11: arrow_arith::arity::try_binary
             at /Users/chuet/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/arrow-arith-57.1.0/src/arity.rs:274:9
  12: arrow_arith::numeric::date_op
             at /Users/chuet/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/arrow-arith-57.1.0/src/numeric.rs:295:46
  13: arrow_arith::numeric::arithmetic_op
             at /Users/chuet/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/arrow-arith-57.1.0/src/numeric.rs:249:24
  14: arrow_arith::numeric::add_wrapping
             at /Users/chuet/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/arrow-arith-57.1.0/src/numeric.rs:40:5
  15: core::ops::function::Fn::call
             at /Users/chuet/.rustup/toolchains/1.92.0-aarch64-apple-darwin/lib/rustlib/src/rust/library/core/src/ops/function.rs:79:5
  16: datafusion_physical_expr_common::datum::apply
             at /Users/chuet/Projects/foundry/datafusion/datafusion/physical-expr-common/src/datum.rs:51:25
  17: <datafusion_physical_expr::expressions::binary::BinaryExpr as datafusion_physical_expr_common::physical_expr::PhysicalExpr>::evaluate

# This file was originally created by a porting script from:
# https://github.com/lakehq/sail/tree/43b6ed8221de5c4c4adbedbb267ae1351158b43c/crates/sail-spark-connect/tests/gold_data/function
# This file is part of the implementation of the datafusion-spark function library.
# For more information, please see:
# https://github.com/apache/datafusion/issues/15914

## Original Query: SELECT add_months('2016-08-31', 1);
## PySpark 3.5.5 Result: {'add_months(2016-08-31, 1)': datetime.date(2016, 9, 30), 'typeof(add_months(2016-08-31, 1))': 'date', 'typeof(2016-08-31)': 'string', 'typeof(1)': 'int'}
#query
#SELECT add_months('2016-08-31'::string, 1::int);
query D
SELECT add_months('2016-07-30'::date, 1::int);
----
2016-08-30

query D
SELECT add_months('2016-07-30'::date, 0::int);
----
2016-07-30

query D
SELECT add_months('2016-07-30'::date, 10000::int);
----
2849-11-30

query D
SELECT add_months('2016-07-30'::date, -5::int);
----
2016-02-29

# Test with NULL values
query D
SELECT add_months(NULL::date, 1::int);
----
NULL

query D
SELECT add_months('2016-07-30'::date, NULL::int);
----
NULL

query D
SELECT add_months(NULL::date, NULL::int);
----
NULL
11 changes: 3 additions & 8 deletions datafusion/sqllogictest/test_files/spark/datetime/date_add.slt
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ SELECT date_add('2016-07-30'::date, arrow_cast(1, 'Int8'));
2016-07-31

query D
SELECT date_sub('2016-07-30'::date, 0::int);
SELECT date_add('2016-07-30'::date, 0::int);
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

caught this issue

----
2016-07-30

Expand All @@ -51,20 +51,15 @@ SELECT date_add('2016-07-30'::date, 2147483647::int)::int;
-2147466637

query I
SELECT date_sub('1969-01-01'::date, 2147483647::int)::int;
SELECT date_add('1969-01-01'::date, 2147483647::int)::int;
----
2147483284
2147483282

query D
SELECT date_add('2016-07-30'::date, 100000::int);
----
2290-05-15

query D
SELECT date_sub('2016-07-30'::date, 100000::int);
----
1742-10-15
Comment on lines -54 to -66
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe we can copy the date_sub ones into date_sub.slt?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

updated


# Test with negative day values (should subtract days)
query D
SELECT date_add('2016-07-30'::date, -5::int);
Expand Down