Skip to content

Commit 6274080

Browse files
Update to_unixtime udf function to support a consistent set of argument types (#19442)
## Which issue does this PR close? <!-- We generally require a GitHub issue to be filed for all bug fixes and enhancements and this helps us generate change logs for our releases. You can link an issue to this PR using the GitHub syntax. For example `Closes #123` indicates that this PR will close issue #123. --> - Closes #19119 ## Rationale for this change to_unixtime lacks the support for several data types. <!-- Why are you proposing this change? If this is already explained clearly in the issue then this section is not needed. Explaining clearly why changes are proposed helps reviewers understand your changes and offer better suggestions for fixes. --> ## What changes are included in this PR? Expanded `to_unixtime` support to all signed ints (Int8/16/32/64), all unsigned ints (UInt8/16/32/64), all floats (Float16/32/64), all UTF8 variants (Utf8/Utf8View/LargeUtf8), <!-- There is no need to duplicate the description in the issue here but it is sometimes worth providing a summary of the individual changes in this PR. --> ## Are these changes tested? Added sqllogictest <!-- We typically require tests for all PRs in order to: 1. Prevent the code from being accidentally broken by subsequent changes 2. Serve as another way to document the expected behavior of the code If tests are not included in your PR, please explain why (for example, are they covered by existing tests)? --> ## Are there any user-facing changes? <!-- If there are user-facing changes then we may require documentation to be updated before approving the PR. --> <!-- If there are any breaking changes to public APIs, please add the `api change` label. --> --------- Co-authored-by: Martin Grigorov <[email protected]>
1 parent 058bcb0 commit 6274080

File tree

4 files changed

+90
-10
lines changed

4 files changed

+90
-10
lines changed

datafusion/functions/src/datetime/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ pub mod expr_fn {
108108
),
109109
(
110110
to_unixtime,
111-
"converts a string and optional formats to a Unixtime",
111+
"converts a value to seconds since the unix epoch",
112112
args,
113113
),(
114114
to_timestamp,

datafusion/functions/src/datetime/to_unixtime.rs

Lines changed: 30 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ use std::any::Any;
2727

2828
#[user_doc(
2929
doc_section(label = "Time and Date Functions"),
30-
description = "Converts a value to seconds since the unix epoch (`1970-01-01T00:00:00Z`). Supports strings, dates, timestamps and double types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono formats](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) are provided.",
30+
description = "Converts a value to seconds since the unix epoch (`1970-01-01T00:00:00`). Supports strings, dates, timestamps, integer, unsigned integer, and float types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono formats](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) are provided. Integers, unsigned integers, and floats are interpreted as seconds since the unix epoch (`1970-01-01T00:00:00`).",
3131
syntax_example = "to_unixtime(expression[, ..., format_n])",
3232
sql_example = r#"
3333
```sql
@@ -101,22 +101,44 @@ impl ScalarUDFImpl for ToUnixtimeFunc {
101101

102102
// validate that any args after the first one are Utf8
103103
if arg_args.len() > 1 {
104-
validate_data_types(arg_args, "to_unixtime")?;
104+
// Format arguments only make sense for string inputs
105+
match arg_args[0].data_type() {
106+
DataType::Utf8View | DataType::LargeUtf8 | DataType::Utf8 => {
107+
validate_data_types(arg_args, "to_unixtime")?;
108+
}
109+
_ => {
110+
return exec_err!(
111+
"to_unixtime function only accepts format arguments with string input, got {} arguments",
112+
arg_args.len()
113+
);
114+
}
115+
}
105116
}
106117

107118
match arg_args[0].data_type() {
108-
DataType::Int32 | DataType::Int64 | DataType::Null | DataType::Float64 => {
109-
arg_args[0].cast_to(&DataType::Int64, None)
110-
}
119+
DataType::Int8
120+
| DataType::Int16
121+
| DataType::Int32
122+
| DataType::Int64
123+
| DataType::UInt8
124+
| DataType::UInt16
125+
| DataType::UInt32
126+
| DataType::UInt64
127+
| DataType::Float16
128+
| DataType::Float32
129+
| DataType::Float64
130+
| DataType::Null => arg_args[0].cast_to(&DataType::Int64, None),
111131
DataType::Date64 | DataType::Date32 => arg_args[0]
112132
.cast_to(&DataType::Timestamp(TimeUnit::Second, None), None)?
113133
.cast_to(&DataType::Int64, None),
114134
DataType::Timestamp(_, tz) => arg_args[0]
115135
.cast_to(&DataType::Timestamp(TimeUnit::Second, tz), None)?
116136
.cast_to(&DataType::Int64, None),
117-
DataType::Utf8 => ToTimestampSecondsFunc::new()
118-
.invoke_with_args(args)?
119-
.cast_to(&DataType::Int64, None),
137+
DataType::Utf8View | DataType::LargeUtf8 | DataType::Utf8 => {
138+
ToTimestampSecondsFunc::new()
139+
.invoke_with_args(args)?
140+
.cast_to(&DataType::Int64, None)
141+
}
120142
other => {
121143
exec_err!("Unsupported data type {} for function to_unixtime", other)
122144
}

datafusion/sqllogictest/test_files/timestamps.slt

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3526,6 +3526,64 @@ select to_unixtime(arrow_cast(1599523200.414, 'Float64'));
35263526
----
35273527
1599523200
35283528

3529+
query I
3530+
select to_unixtime(arrow_cast(-1, 'Int8'));
3531+
----
3532+
-1
3533+
3534+
query I
3535+
select to_unixtime(arrow_cast(null, 'Int8'));
3536+
----
3537+
NULL
3538+
3539+
query I
3540+
select to_unixtime(arrow_cast(1000, 'Int16'));
3541+
----
3542+
1000
3543+
3544+
query I
3545+
select to_unixtime(arrow_cast(255, 'UInt8'));
3546+
----
3547+
255
3548+
3549+
query I
3550+
select to_unixtime(arrow_cast(65535, 'UInt16'));
3551+
----
3552+
65535
3553+
3554+
query I
3555+
select to_unixtime(arrow_cast(1599523200, 'UInt32'));
3556+
----
3557+
1599523200
3558+
3559+
query I
3560+
select to_unixtime(arrow_cast(1599523200, 'UInt64'));
3561+
----
3562+
1599523200
3563+
3564+
query error DataFusion error: Arrow error: Cast error: Can't cast value 18446744073709551615 to type Int64
3565+
select to_unixtime(arrow_cast(18446744073709551615, 'UInt64'));
3566+
3567+
query I
3568+
select to_unixtime(arrow_cast(1000.12, 'Float16'));
3569+
----
3570+
1000
3571+
3572+
query I
3573+
select to_unixtime(arrow_cast(1000.414, 'Float32'));
3574+
----
3575+
1000
3576+
3577+
query I
3578+
select to_unixtime(arrow_cast('2020-09-08T12:00:00+00:00', 'Utf8View'));
3579+
----
3580+
1599566400
3581+
3582+
query I
3583+
select to_unixtime(arrow_cast('2020-09-08T12:00:00+00:00', 'LargeUtf8'));
3584+
----
3585+
1599566400
3586+
35293587
##########
35303588
## Tests for the "AT TIME ZONE" clause
35313589
##########

docs/source/user-guide/sql/scalar_functions.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2982,7 +2982,7 @@ Additional examples can be found [here](https://github.com/apache/datafusion/blo
29822982

29832983
### `to_unixtime`
29842984

2985-
Converts a value to seconds since the unix epoch (`1970-01-01T00:00:00Z`). Supports strings, dates, timestamps and double types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono formats](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) are provided.
2985+
Converts a value to seconds since the unix epoch (`1970-01-01T00:00:00`). Supports strings, dates, timestamps, integer, unsigned integer, and float types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono formats](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) are provided. Integers, unsigned integers, and floats are interpreted as seconds since the unix epoch (`1970-01-01T00:00:00`).
29862986

29872987
```sql
29882988
to_unixtime(expression[, ..., format_n])

0 commit comments

Comments
 (0)