Skip to content

Commit 5a01e68

Browse files
authored
Update to_date udf function to support a consistent set of argument types (#19134)
## Which issue does this PR close? <!-- We generally require a GitHub issue to be filed for all bug fixes and enhancements and this helps us generate change logs for our releases. You can link an issue to this PR using the GitHub syntax. For example `Closes #123` indicates that this PR will close issue #123. --> - Closes #19120 ## Rationale for this change Improved type support for to_date function. ## What changes are included in this PR? Code, slt, updated docs. ## Are these changes tested? Yes ## Are there any user-facing changes? More types supported.
1 parent 8458946 commit 5a01e68

File tree

3 files changed

+140
-3
lines changed

3 files changed

+140
-3
lines changed

datafusion/functions/src/datetime/to_date.rs

Lines changed: 37 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,12 @@
1616
// under the License.
1717

1818
use crate::datetime::common::*;
19+
use arrow::compute::cast_with_options;
1920
use arrow::datatypes::DataType;
2021
use arrow::datatypes::DataType::*;
2122
use arrow::error::ArrowError::ParseError;
2223
use arrow::{array::types::Date32Type, compute::kernels::cast_utils::Parser};
24+
use datafusion_common::format::DEFAULT_CAST_OPTIONS;
2325
use datafusion_common::{Result, arrow_err, exec_err, internal_datafusion_err};
2426
use datafusion_expr::{
2527
ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
@@ -30,7 +32,7 @@ use std::any::Any;
3032
#[user_doc(
3133
doc_section(label = "Time and Date Functions"),
3234
description = r"Converts a value to a date (`YYYY-MM-DD`).
33-
Supports strings, integer and double types as input.
35+
Supports strings, numeric and timestamp types as input.
3436
Strings are parsed as YYYY-MM-DD (e.g. '2023-07-20') if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided.
3537
Integers and doubles are interpreted as days since the unix epoch (`1970-01-01T00:00:00Z`).
3638
Returns the corresponding date.
@@ -144,9 +146,42 @@ impl ScalarUDFImpl for ToDateFunc {
144146
}
145147

146148
match args[0].data_type() {
147-
Int32 | Int64 | Null | Float64 | Date32 | Date64 => {
149+
Null | Int32 | Int64 | Date32 | Date64 | Timestamp(_, _) => {
148150
args[0].cast_to(&Date32, None)
149151
}
152+
UInt8 | UInt16 | UInt32 | UInt64 | Int8 | Int16 => {
153+
// Arrow cast doesn't support direct casting of these types to date32
154+
// as it only supports Int32 and Int64. To work around that limitation,
155+
// use cast_with_options to cast to Int32 and then cast the result of
156+
// that to Date32.
157+
match &args[0] {
158+
ColumnarValue::Array(array) => {
159+
Ok(ColumnarValue::Array(cast_with_options(
160+
&cast_with_options(&array, &Int32, &DEFAULT_CAST_OPTIONS)?,
161+
&Date32,
162+
&DEFAULT_CAST_OPTIONS,
163+
)?))
164+
}
165+
ColumnarValue::Scalar(scalar) => {
166+
let sv =
167+
scalar.cast_to_with_options(&Int32, &DEFAULT_CAST_OPTIONS)?;
168+
Ok(ColumnarValue::Scalar(
169+
sv.cast_to_with_options(&Date32, &DEFAULT_CAST_OPTIONS)?,
170+
))
171+
}
172+
}
173+
}
174+
Float16
175+
| Float32
176+
| Float64
177+
| Decimal32(_, _)
178+
| Decimal64(_, _)
179+
| Decimal128(_, _)
180+
| Decimal256(_, _) => {
181+
// The only way this makes sense is to get the Int64 value of the float
182+
// or decimal and then cast that to Date32.
183+
args[0].cast_to(&Int64, None)?.cast_to(&Date32, None)
184+
}
150185
Utf8View | LargeUtf8 | Utf8 => self.to_date(&args),
151186
other => {
152187
exec_err!("Unsupported data type {} for function to_date", other)

datafusion/sqllogictest/test_files/dates.slt

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,12 +164,114 @@ SELECT to_date('21311111');
164164
statement error DataFusion error: Arrow error:
165165
SELECT to_date('213111111');
166166

167+
# verify date cast with tinyint input
168+
query DDDDDD
169+
SELECT to_date(null::tinyint), to_date(0::tinyint), to_date(19::tinyint), to_date(1::tinyint), to_date(-1::tinyint), to_date((0-1)::tinyint)
170+
----
171+
NULL 1970-01-01 1970-01-20 1970-01-02 1969-12-31 1969-12-31
172+
173+
# verify date cast with smallint input
174+
query DDDDDD
175+
SELECT to_date(null::smallint), to_date(0::smallint), to_date(19234::smallint), to_date(1::smallint), to_date(-1::smallint), to_date((0-1)::smallint)
176+
----
177+
NULL 1970-01-01 2022-08-30 1970-01-02 1969-12-31 1969-12-31
178+
167179
# verify date cast with integer input
168180
query DDDDDD
169181
SELECT to_date(null), to_date(0), to_date(19266320), to_date(1), to_date(-1), to_date(0-1)
170182
----
171183
NULL 1970-01-01 +54719-05-25 1970-01-02 1969-12-31 1969-12-31
172184

185+
# verify date cast with bigint input
186+
query DDDDDD
187+
SELECT to_date(null::bigint), to_date(0::bigint), to_date(191234::bigint), to_date(1::bigint), to_date(-1::bigint), to_date((0-1)::bigint)
188+
----
189+
NULL 1970-01-01 2493-07-31 1970-01-02 1969-12-31 1969-12-31
190+
191+
# verify date cast with unsigned tinyint input
192+
query DDDD
193+
SELECT to_date(null::tinyint unsigned), to_date(0::tinyint unsigned), to_date(192::tinyint unsigned), to_date(1::tinyint unsigned)
194+
----
195+
NULL 1970-01-01 1970-07-12 1970-01-02
196+
197+
# verify date cast with unsigned smallint input
198+
query DDDD
199+
SELECT to_date(null::smallint unsigned), to_date(0::smallint unsigned), to_date(19260::smallint unsigned), to_date(1::smallint unsigned)
200+
----
201+
NULL 1970-01-01 2022-09-25 1970-01-02
202+
203+
# verify date cast with unsigned int input
204+
query DDDD
205+
SELECT to_date(null::int unsigned), to_date(0::int unsigned), to_date(19260::int unsigned), to_date(1::int unsigned)
206+
----
207+
NULL 1970-01-01 2022-09-25 1970-01-02
208+
209+
# verify date cast with unsigned bigint input
210+
query DDDD
211+
SELECT to_date(null::bigint unsigned), to_date(0::bigint unsigned), to_date(19260000::bigint unsigned), to_date(1::bigint unsigned)
212+
----
213+
NULL 1970-01-01 +54702-02-03 1970-01-02
214+
215+
# verify date cast with real input (float32)
216+
query DDDDDD
217+
SELECT to_date(null::real), to_date(0.0::real), to_date(19260.1::real), to_date(1.1::real), to_date(-1.1::real), to_date(0-1.1::real)
218+
----
219+
NULL 1970-01-01 2022-09-25 1970-01-02 1969-12-31 1969-12-31
220+
221+
# verify date cast with double input (float64)
222+
query DDDDDD
223+
SELECT to_date(null::double), to_date(0.0::double), to_date(19260.1::double), to_date(1.1::double), to_date(-1.1::double), to_date(0-1.1::double)
224+
----
225+
NULL 1970-01-01 2022-09-25 1970-01-02 1969-12-31 1969-12-31
226+
227+
# verify date cast with decimal32 input (Decimal32)
228+
query DDDDDD
229+
SELECT to_date(arrow_cast(null, 'Decimal32(8,2)')), to_date(arrow_cast(0.0, 'Decimal32(8,2)')), to_date(arrow_cast(19260.1, 'Decimal32(8,2)')), to_date(arrow_cast(1.1, 'Decimal32(8,2)')), to_date(arrow_cast(-1.1, 'Decimal32(8,2)')), to_date(0-arrow_cast(1.1, 'Decimal32(8,2)'))
230+
----
231+
NULL 1970-01-01 2022-09-25 1970-01-02 1969-12-31 1969-12-31
232+
233+
# verify date cast with Decimal64 input
234+
query DDDDDD
235+
SELECT to_date(arrow_cast(null, 'Decimal64(8,2)')), to_date(arrow_cast(0.0, 'Decimal64(8,2)')), to_date(arrow_cast(19260.1, 'Decimal64(8,2)')), to_date(arrow_cast(1.1, 'Decimal64(8,2)')), to_date(arrow_cast(-1.1, 'Decimal64(8,2)')), to_date(0-arrow_cast(1.1, 'Decimal64(8,2)'))
236+
----
237+
NULL 1970-01-01 2022-09-25 1970-01-02 1969-12-31 1969-12-31
238+
239+
# verify date cast with Decimal128 input
240+
query DDDDDD
241+
SELECT to_date(arrow_cast(null, 'Decimal128(8,2)')), to_date(arrow_cast(0.0, 'Decimal128(8,2)')), to_date(arrow_cast(19260.1, 'Decimal128(8,2)')), to_date(arrow_cast(1.1, 'Decimal128(8,2)')), to_date(arrow_cast(-1.1, 'Decimal128(8,2)')), to_date(0-arrow_cast(1.1, 'Decimal128(8,2)'))
242+
----
243+
NULL 1970-01-01 2022-09-25 1970-01-02 1969-12-31 1969-12-31
244+
245+
# verify date cast with decimal input (Decimal128)
246+
query DDDDDD
247+
SELECT to_date(null::decimal(10,2)), to_date(0.0::decimal(10,2)), to_date(19260.1::decimal(10,2)), to_date(1.1::decimal(10,2)), to_date(-1.1::decimal(10,2)), to_date(0-1.1::decimal(10,2))
248+
----
249+
NULL 1970-01-01 2022-09-25 1970-01-02 1969-12-31 1969-12-31
250+
251+
# verify date cast with Decimal256 input
252+
query DDDDDD
253+
SELECT to_date(arrow_cast(null, 'Decimal256(8,2)')), to_date(arrow_cast(0.0, 'Decimal256(8,2)')), to_date(arrow_cast(19260.1, 'Decimal256(8,2)')), to_date(arrow_cast(1.1, 'Decimal256(8,2)')), to_date(arrow_cast(-1.1, 'Decimal256(8,2)')), to_date(0-arrow_cast(1.1, 'Decimal256(8,2)'))
254+
----
255+
NULL 1970-01-01 2022-09-25 1970-01-02 1969-12-31 1969-12-31
256+
257+
# verify date cast with date input
258+
query DDDD
259+
SELECT to_date('2024-12-1'::date), to_date('1920-01-12'::date), to_date('1970-01-01'::date), to_date('-0200-07-22'::date)
260+
----
261+
2024-12-01 1920-01-12 1970-01-01 -0200-07-22
262+
263+
# verify date cast with date64 input
264+
query DDDD
265+
SELECT to_date(arrow_cast('2024-12-1', 'Date64')), to_date(arrow_cast('1920-01-12', 'Date64')), to_date(arrow_cast('1970-01-01', 'Date64')), to_date(arrow_cast(-863999913600000, 'Date64'))
266+
----
267+
2024-12-01 1920-01-12 1970-01-01 -25410-12-07
268+
269+
# verify date cast with timestamp input
270+
query DD
271+
SELECT to_date('2024-12-01T00:32:45'::timestamp), to_date('1677-12-01T00:32:45'::timestamp)
272+
----
273+
2024-12-01 1677-12-01
274+
173275
# verify date output types
174276
query TTT
175277
SELECT arrow_typeof(to_date(1)), arrow_typeof(to_date(null)), arrow_typeof(to_date('2023-01-10 12:34:56.000'))

docs/source/user-guide/sql/scalar_functions.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2709,7 +2709,7 @@ Additional examples can be found [here](https://github.com/apache/datafusion/blo
27092709
### `to_date`
27102710

27112711
Converts a value to a date (`YYYY-MM-DD`).
2712-
Supports strings, integer and double types as input.
2712+
Supports strings, numeric and timestamp types as input.
27132713
Strings are parsed as YYYY-MM-DD (e.g. '2023-07-20') if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided.
27142714
Integers and doubles are interpreted as days since the unix epoch (`1970-01-01T00:00:00Z`).
27152715
Returns the corresponding date.

0 commit comments

Comments
 (0)