Skip to content

Commit a2f02f0

Browse files
fix: Return Int for Date - Date instead of duration (#19563)
## Which issue does this PR close? <!-- We generally require a GitHub issue to be filed for all bug fixes and enhancements and this helps us generate change logs for our releases. You can link an issue to this PR using the GitHub syntax. For example `Closes #123` indicates that this PR will close issue #123. --> - Closes #19528. ## Rationale for this change The Date - Date currently returns duration which is not consistent with other databases. <!-- Why are you proposing this change? If this is already explained clearly in the issue then this section is not needed. Explaining clearly why changes are proposed helps reviewers understand your changes and offer better suggestions for fixes. --> ## What changes are included in this PR? - Made changes to so it return Int instead of the days duration - Added slt test <!-- There is no need to duplicate the description in the issue here but it is sometimes worth providing a summary of the individual changes in this PR. --> ## Are these changes tested? Yes <!-- We typically require tests for all PRs in order to: 1. Prevent the code from being accidentally broken by subsequent changes 2. Serve as another way to document the expected behavior of the code If tests are not included in your PR, please explain why (for example, are they covered by existing tests)? --> ## Are there any user-facing changes? <!-- If there are user-facing changes then we may require documentation to be updated before approving the PR. --> <!-- If there are any breaking changes to public APIs, please add the `api change` label. --> --------- Co-authored-by: Jeffrey Vo <[email protected]>
1 parent 924037e commit a2f02f0

File tree

4 files changed

+121
-16
lines changed

4 files changed

+121
-16
lines changed

datafusion/expr-common/src/type_coercion/binary.rs

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -260,8 +260,16 @@ impl<'a> BinaryTypeCoercer<'a> {
260260
)
261261
})
262262
}
263+
Minus if is_date_minus_date(lhs, rhs) => {
264+
return Ok(Signature {
265+
lhs: lhs.clone(),
266+
rhs: rhs.clone(),
267+
ret: Int64,
268+
});
269+
}
263270
Plus | Minus | Multiply | Divide | Modulo => {
264271
if let Ok(ret) = self.get_result(lhs, rhs) {
272+
265273
// Temporal arithmetic, e.g. Date32 + Interval
266274
Ok(Signature{
267275
lhs: lhs.clone(),
@@ -281,6 +289,7 @@ impl<'a> BinaryTypeCoercer<'a> {
281289
ret,
282290
})
283291
} else if let Some(coerced) = temporal_coercion_strict_timezone(lhs, rhs) {
292+
284293
// Temporal arithmetic by first coercing to a common time representation
285294
// e.g. Date32 - Timestamp
286295
let ret = self.get_result(&coerced, &coerced).map_err(|e| {
@@ -351,6 +360,15 @@ fn is_decimal(data_type: &DataType) -> bool {
351360
)
352361
}
353362

363+
/// Returns true if both operands are Date types (Date32 or Date64)
364+
/// Used to detect Date - Date operations which should return Int64 (days difference)
365+
fn is_date_minus_date(lhs: &DataType, rhs: &DataType) -> bool {
366+
matches!(
367+
(lhs, rhs),
368+
(DataType::Date32, DataType::Date32) | (DataType::Date64, DataType::Date64)
369+
)
370+
}
371+
354372
/// Coercion rules for mathematics operators between decimal and non-decimal types.
355373
fn math_decimal_coercion(
356374
lhs_type: &DataType,

datafusion/physical-expr/src/expressions/binary.rs

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ use arrow::datatypes::*;
3030
use arrow::error::ArrowError;
3131
use datafusion_common::cast::as_boolean_array;
3232
use datafusion_common::{Result, ScalarValue, internal_err, not_impl_err};
33+
3334
use datafusion_expr::binary::BinaryTypeCoercer;
3435
use datafusion_expr::interval_arithmetic::{Interval, apply_operator};
3536
use datafusion_expr::sort_properties::ExprProperties;
@@ -162,6 +163,94 @@ fn boolean_op(
162163
op(ll, rr).map(|t| Arc::new(t) as _)
163164
}
164165

166+
/// Returns true if both operands are Date types (Date32 or Date64)
167+
/// Used to detect Date - Date operations which should return Int64 (days difference)
168+
fn is_date_minus_date(lhs: &DataType, rhs: &DataType) -> bool {
169+
matches!(
170+
(lhs, rhs),
171+
(DataType::Date32, DataType::Date32) | (DataType::Date64, DataType::Date64)
172+
)
173+
}
174+
175+
/// Computes the difference between two dates and returns the result as Int64 (days)
176+
/// This aligns with PostgreSQL, DuckDB, and MySQL behavior where date - date returns an integer
177+
///
178+
/// Implementation: Uses Arrow's sub_wrapping to get Duration, then converts to Int64 days
179+
fn apply_date_subtraction(
180+
lhs: &ColumnarValue,
181+
rhs: &ColumnarValue,
182+
) -> Result<ColumnarValue> {
183+
use arrow::compute::kernels::numeric::sub_wrapping;
184+
185+
// Use Arrow's sub_wrapping to compute the Duration result
186+
let duration_result = apply(lhs, rhs, sub_wrapping)?;
187+
188+
// Convert Duration to Int64 (days)
189+
match duration_result {
190+
ColumnarValue::Array(array) => {
191+
let int64_array = duration_to_days(&array)?;
192+
Ok(ColumnarValue::Array(int64_array))
193+
}
194+
ColumnarValue::Scalar(scalar) => {
195+
// Convert scalar Duration to Int64 days
196+
let array = scalar.to_array_of_size(1)?;
197+
let int64_array = duration_to_days(&array)?;
198+
let int64_scalar = ScalarValue::try_from_array(int64_array.as_ref(), 0)?;
199+
Ok(ColumnarValue::Scalar(int64_scalar))
200+
}
201+
}
202+
}
203+
204+
/// Converts a Duration array to Int64 days
205+
/// Handles different Duration time units (Second, Millisecond, Microsecond, Nanosecond)
206+
fn duration_to_days(array: &ArrayRef) -> Result<ArrayRef> {
207+
use datafusion_common::cast::{
208+
as_duration_microsecond_array, as_duration_millisecond_array,
209+
as_duration_nanosecond_array, as_duration_second_array,
210+
};
211+
212+
const SECONDS_PER_DAY: i64 = 86_400;
213+
const MILLIS_PER_DAY: i64 = 86_400_000;
214+
const MICROS_PER_DAY: i64 = 86_400_000_000;
215+
const NANOS_PER_DAY: i64 = 86_400_000_000_000;
216+
217+
match array.data_type() {
218+
DataType::Duration(TimeUnit::Second) => {
219+
let duration_array = as_duration_second_array(array)?;
220+
let result: Int64Array = duration_array
221+
.iter()
222+
.map(|v| v.map(|val| val / SECONDS_PER_DAY))
223+
.collect();
224+
Ok(Arc::new(result))
225+
}
226+
DataType::Duration(TimeUnit::Millisecond) => {
227+
let duration_array = as_duration_millisecond_array(array)?;
228+
let result: Int64Array = duration_array
229+
.iter()
230+
.map(|v| v.map(|val| val / MILLIS_PER_DAY))
231+
.collect();
232+
Ok(Arc::new(result))
233+
}
234+
DataType::Duration(TimeUnit::Microsecond) => {
235+
let duration_array = as_duration_microsecond_array(array)?;
236+
let result: Int64Array = duration_array
237+
.iter()
238+
.map(|v| v.map(|val| val / MICROS_PER_DAY))
239+
.collect();
240+
Ok(Arc::new(result))
241+
}
242+
DataType::Duration(TimeUnit::Nanosecond) => {
243+
let duration_array = as_duration_nanosecond_array(array)?;
244+
let result: Int64Array = duration_array
245+
.iter()
246+
.map(|v| v.map(|val| val / NANOS_PER_DAY))
247+
.collect();
248+
Ok(Arc::new(result))
249+
}
250+
other => internal_err!("duration_to_days expected Duration type, got: {}", other),
251+
}
252+
}
253+
165254
impl PhysicalExpr for BinaryExpr {
166255
/// Return a reference to Any that can be used for downcasting
167256
fn as_any(&self) -> &dyn Any {
@@ -251,6 +340,11 @@ impl PhysicalExpr for BinaryExpr {
251340
match self.op {
252341
Operator::Plus if self.fail_on_overflow => return apply(&lhs, &rhs, add),
253342
Operator::Plus => return apply(&lhs, &rhs, add_wrapping),
343+
// Special case: Date - Date returns Int64 (days difference)
344+
// This aligns with PostgreSQL, DuckDB, and MySQL behavior
345+
Operator::Minus if is_date_minus_date(&left_data_type, &right_data_type) => {
346+
return apply_date_subtraction(&lhs, &rhs);
347+
}
254348
Operator::Minus if self.fail_on_overflow => return apply(&lhs, &rhs, sub),
255349
Operator::Minus => return apply(&lhs, &rhs, sub_wrapping),
256350
Operator::Multiply if self.fail_on_overflow => return apply(&lhs, &rhs, mul),
Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,15 @@
11
# date - date → integer
22
# Subtract dates, producing the number of days elapsed
33
# date '2001-10-01' - date '2001-09-28' → 3
4+
# This aligns with PostgreSQL, DuckDB, and MySQL behavior
5+
# Resolved by: https://github.com/apache/datafusion/issues/19528
46

5-
# note that datafusion returns Duration whereas postgres returns an int
6-
# Tracking issue: https://github.com/apache/datafusion/issues/19528
7-
8-
query ?
7+
query I
98
SELECT '2001-10-01'::date - '2001-09-28'::date
109
----
11-
3 days 0 hours 0 mins 0 secs
10+
3
1211

1312
query T
1413
SELECT arrow_typeof('2001-10-01'::date - '2001-09-28'::date)
1514
----
16-
Duration(s)
15+
Int64

datafusion/sqllogictest/test_files/datetime/dates.slt

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -94,13 +94,6 @@ caused by
9494
Error during planning: Cannot coerce arithmetic expression Timestamp(ns) + Utf8 to valid types
9595

9696

97-
# DATE minus DATE
98-
# https://github.com/apache/arrow-rs/issues/4383
99-
query ?
100-
SELECT DATE '2023-04-09' - DATE '2023-04-02';
101-
----
102-
7 days 0 hours 0 mins 0 secs
103-
10497
# DATE minus Timestamp
10598
query ?
10699
SELECT DATE '2023-04-09' - '2000-01-01T00:00:00'::timestamp;
@@ -113,17 +106,18 @@ SELECT '2023-01-01T00:00:00'::timestamp - DATE '2021-01-01';
113106
----
114107
730 days 0 hours 0 mins 0.000000000 secs
115108

116-
# NULL with DATE arithmetic should yield NULL
117-
query ?
109+
# NULL with DATE arithmetic should yield NULL (but Int64 type)
110+
query I
118111
SELECT NULL - DATE '1984-02-28';
119112
----
120113
NULL
121114

122-
query ?
115+
query I
123116
SELECT DATE '1984-02-28' - NULL
124117
----
125118
NULL
126119

120+
127121
# to_date_test
128122
statement ok
129123
create table to_date_t1(ts bigint) as VALUES

0 commit comments

Comments
 (0)