|
| 1 | +// Licensed to the Apache Software Foundation (ASF) under one |
| 2 | +// or more contributor license agreements. See the NOTICE file |
| 3 | +// distributed with this work for additional information |
| 4 | +// regarding copyright ownership. The ASF licenses this file |
| 5 | +// to you under the Apache License, Version 2.0 (the |
| 6 | +// "License"); you may not use this file except in compliance |
| 7 | +// with the License. You may obtain a copy of the License at |
| 8 | +// |
| 9 | +// http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | +// |
| 11 | +// Unless required by applicable law or agreed to in writing, |
| 12 | +// software distributed under the License is distributed on an |
| 13 | +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| 14 | +// KIND, either express or implied. See the License for the |
| 15 | +// specific language governing permissions and limitations |
| 16 | +// under the License. |
| 17 | + |
| 18 | +use crate::datetime::common::*; |
| 19 | +use arrow::array::builder::PrimitiveBuilder; |
| 20 | +use arrow::array::cast::AsArray; |
| 21 | +use arrow::array::temporal_conversions::time_to_time64ns; |
| 22 | +use arrow::array::types::Time64NanosecondType; |
| 23 | +use arrow::array::{Array, PrimitiveArray, StringArrayType}; |
| 24 | +use arrow::datatypes::DataType; |
| 25 | +use arrow::datatypes::DataType::*; |
| 26 | +use chrono::NaiveTime; |
| 27 | +use datafusion_common::{Result, ScalarValue, exec_err}; |
| 28 | +use datafusion_expr::{ |
| 29 | + ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, |
| 30 | +}; |
| 31 | +use datafusion_macros::user_doc; |
| 32 | +use std::any::Any; |
| 33 | +use std::sync::Arc; |
| 34 | + |
| 35 | +/// Default time formats to try when parsing without an explicit format |
| 36 | +const DEFAULT_TIME_FORMATS: &[&str] = &[ |
| 37 | + "%H:%M:%S%.f", // 12:30:45.123456789 |
| 38 | + "%H:%M:%S", // 12:30:45 |
| 39 | + "%H:%M", // 12:30 |
| 40 | +]; |
| 41 | + |
| 42 | +#[user_doc( |
| 43 | + doc_section(label = "Time and Date Functions"), |
| 44 | + description = r"Converts a value to a time (`HH:MM:SS.nnnnnnnnn`). |
| 45 | +Supports strings and timestamps as input. |
| 46 | +Strings are parsed as `HH:MM:SS`, `HH:MM:SS.nnnnnnnnn`, or `HH:MM` if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided. |
| 47 | +Timestamps will have the time portion extracted. |
| 48 | +Returns the corresponding time. |
| 49 | +
|
| 50 | +Note: `to_time` returns Time64(Nanosecond), which represents the time of day in nanoseconds since midnight.", |
| 51 | + syntax_example = "to_time('12:30:45', '%H:%M:%S')", |
| 52 | + sql_example = r#"```sql |
| 53 | +> select to_time('12:30:45'); |
| 54 | ++---------------------------+ |
| 55 | +| to_time(Utf8("12:30:45")) | |
| 56 | ++---------------------------+ |
| 57 | +| 12:30:45 | |
| 58 | ++---------------------------+ |
| 59 | +> select to_time('12-30-45', '%H-%M-%S'); |
| 60 | ++--------------------------------------------+ |
| 61 | +| to_time(Utf8("12-30-45"),Utf8("%H-%M-%S")) | |
| 62 | ++--------------------------------------------+ |
| 63 | +| 12:30:45 | |
| 64 | ++--------------------------------------------+ |
| 65 | +> select to_time('2024-01-15 14:30:45'::timestamp); |
| 66 | ++--------------------------------------------------+ |
| 67 | +| to_time(Utf8("2024-01-15 14:30:45")) | |
| 68 | ++--------------------------------------------------+ |
| 69 | +| 14:30:45 | |
| 70 | ++--------------------------------------------------+ |
| 71 | +``` |
| 72 | +
|
| 73 | +Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/builtin_functions/date_time.rs) |
| 74 | +"#, |
| 75 | + standard_argument(name = "expression", prefix = "String or Timestamp"), |
| 76 | + argument( |
| 77 | + name = "format_n", |
| 78 | + description = r"Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order |
| 79 | + they appear with the first successful one being returned. If none of the formats successfully parse the expression |
| 80 | + an error will be returned." |
| 81 | + ) |
| 82 | +)] |
| 83 | +#[derive(Debug, PartialEq, Eq, Hash)] |
| 84 | +pub struct ToTimeFunc { |
| 85 | + signature: Signature, |
| 86 | +} |
| 87 | + |
| 88 | +impl Default for ToTimeFunc { |
| 89 | + fn default() -> Self { |
| 90 | + Self::new() |
| 91 | + } |
| 92 | +} |
| 93 | + |
| 94 | +impl ToTimeFunc { |
| 95 | + pub fn new() -> Self { |
| 96 | + Self { |
| 97 | + signature: Signature::variadic_any(Volatility::Immutable), |
| 98 | + } |
| 99 | + } |
| 100 | +} |
| 101 | + |
| 102 | +impl ScalarUDFImpl for ToTimeFunc { |
| 103 | + fn as_any(&self) -> &dyn Any { |
| 104 | + self |
| 105 | + } |
| 106 | + |
| 107 | + fn name(&self) -> &str { |
| 108 | + "to_time" |
| 109 | + } |
| 110 | + |
| 111 | + fn signature(&self) -> &Signature { |
| 112 | + &self.signature |
| 113 | + } |
| 114 | + |
| 115 | + fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> { |
| 116 | + Ok(Time64(arrow::datatypes::TimeUnit::Nanosecond)) |
| 117 | + } |
| 118 | + |
| 119 | + fn invoke_with_args( |
| 120 | + &self, |
| 121 | + args: datafusion_expr::ScalarFunctionArgs, |
| 122 | + ) -> Result<ColumnarValue> { |
| 123 | + let args = args.args; |
| 124 | + if args.is_empty() { |
| 125 | + return exec_err!("to_time function requires 1 or more arguments, got 0"); |
| 126 | + } |
| 127 | + |
| 128 | + // validate that any args after the first one are Utf8 |
| 129 | + if args.len() > 1 { |
| 130 | + validate_data_types(&args, "to_time")?; |
| 131 | + } |
| 132 | + |
| 133 | + match args[0].data_type() { |
| 134 | + Utf8View | LargeUtf8 | Utf8 => string_to_time(&args), |
| 135 | + Null => Ok(ColumnarValue::Scalar(ScalarValue::Time64Nanosecond(None))), |
| 136 | + // Support timestamp input by extracting time portion using Arrow cast |
| 137 | + Timestamp(_, _) => timestamp_to_time(&args[0]), |
| 138 | + other => { |
| 139 | + exec_err!("Unsupported data type {} for function to_time", other) |
| 140 | + } |
| 141 | + } |
| 142 | + } |
| 143 | + |
| 144 | + fn documentation(&self) -> Option<&Documentation> { |
| 145 | + self.doc() |
| 146 | + } |
| 147 | +} |
| 148 | + |
| 149 | +/// Convert string arguments to time (standalone function, not a method on ToTimeFunc) |
| 150 | +fn string_to_time(args: &[ColumnarValue]) -> Result<ColumnarValue> { |
| 151 | + let formats = collect_formats(args)?; |
| 152 | + |
| 153 | + match &args[0] { |
| 154 | + ColumnarValue::Scalar(ScalarValue::Utf8(s)) |
| 155 | + | ColumnarValue::Scalar(ScalarValue::LargeUtf8(s)) |
| 156 | + | ColumnarValue::Scalar(ScalarValue::Utf8View(s)) => { |
| 157 | + let result = s |
| 158 | + .as_ref() |
| 159 | + .map(|s| parse_time_with_formats(s, &formats)) |
| 160 | + .transpose()?; |
| 161 | + Ok(ColumnarValue::Scalar(ScalarValue::Time64Nanosecond(result))) |
| 162 | + } |
| 163 | + ColumnarValue::Array(array) => { |
| 164 | + let result = match array.data_type() { |
| 165 | + Utf8 => parse_time_array(&array.as_string::<i32>(), &formats)?, |
| 166 | + LargeUtf8 => parse_time_array(&array.as_string::<i64>(), &formats)?, |
| 167 | + Utf8View => parse_time_array(&array.as_string_view(), &formats)?, |
| 168 | + other => return exec_err!("Unsupported type for to_time: {other}"), |
| 169 | + }; |
| 170 | + Ok(ColumnarValue::Array(Arc::new(result))) |
| 171 | + } |
| 172 | + other => exec_err!("Unsupported argument for to_time: {other:?}"), |
| 173 | + } |
| 174 | +} |
| 175 | + |
| 176 | +/// Collect format strings from arguments, erroring on non-scalar inputs |
| 177 | +fn collect_formats(args: &[ColumnarValue]) -> Result<Vec<&str>> { |
| 178 | + if args.len() <= 1 { |
| 179 | + return Ok(DEFAULT_TIME_FORMATS.to_vec()); |
| 180 | + } |
| 181 | + |
| 182 | + let mut formats = Vec::with_capacity(args.len() - 1); |
| 183 | + for (i, arg) in args[1..].iter().enumerate() { |
| 184 | + match arg { |
| 185 | + ColumnarValue::Scalar(ScalarValue::Utf8(Some(s))) |
| 186 | + | ColumnarValue::Scalar(ScalarValue::LargeUtf8(Some(s))) |
| 187 | + | ColumnarValue::Scalar(ScalarValue::Utf8View(Some(s))) => { |
| 188 | + formats.push(s.as_str()); |
| 189 | + } |
| 190 | + ColumnarValue::Scalar(ScalarValue::Utf8(None)) |
| 191 | + | ColumnarValue::Scalar(ScalarValue::LargeUtf8(None)) |
| 192 | + | ColumnarValue::Scalar(ScalarValue::Utf8View(None)) => { |
| 193 | + // Skip null format strings |
| 194 | + } |
| 195 | + ColumnarValue::Array(_) => { |
| 196 | + return exec_err!( |
| 197 | + "to_time format argument {} must be a scalar, not an array", |
| 198 | + i + 2 // argument position (1-indexed, +1 for the first arg) |
| 199 | + ); |
| 200 | + } |
| 201 | + other => { |
| 202 | + return exec_err!( |
| 203 | + "to_time format argument {} has unsupported type: {:?}", |
| 204 | + i + 2, |
| 205 | + other.data_type() |
| 206 | + ); |
| 207 | + } |
| 208 | + } |
| 209 | + } |
| 210 | + Ok(formats) |
| 211 | +} |
| 212 | + |
| 213 | +/// Extract time portion from timestamp using Arrow cast kernel |
| 214 | +fn timestamp_to_time(arg: &ColumnarValue) -> Result<ColumnarValue> { |
| 215 | + arg.cast_to(&Time64(arrow::datatypes::TimeUnit::Nanosecond), None) |
| 216 | +} |
| 217 | + |
| 218 | +/// Parse time array using the provided formats |
| 219 | +fn parse_time_array<'a, A: StringArrayType<'a>>( |
| 220 | + array: &A, |
| 221 | + formats: &[&str], |
| 222 | +) -> Result<PrimitiveArray<Time64NanosecondType>> { |
| 223 | + let mut builder: PrimitiveBuilder<Time64NanosecondType> = |
| 224 | + PrimitiveArray::builder(array.len()); |
| 225 | + |
| 226 | + for i in 0..array.len() { |
| 227 | + if array.is_null(i) { |
| 228 | + builder.append_null(); |
| 229 | + } else { |
| 230 | + let s = array.value(i); |
| 231 | + let nanos = parse_time_with_formats(s, formats)?; |
| 232 | + builder.append_value(nanos); |
| 233 | + } |
| 234 | + } |
| 235 | + |
| 236 | + Ok(builder.finish()) |
| 237 | +} |
| 238 | + |
| 239 | +/// Parse time string using provided formats |
| 240 | +fn parse_time_with_formats(s: &str, formats: &[&str]) -> Result<i64> { |
| 241 | + for format in formats { |
| 242 | + if let Ok(time) = NaiveTime::parse_from_str(s, format) { |
| 243 | + // Use Arrow's time_to_time64ns function instead of custom implementation |
| 244 | + return Ok(time_to_time64ns(time)); |
| 245 | + } |
| 246 | + } |
| 247 | + exec_err!( |
| 248 | + "Error parsing '{}' as time. Tried formats: {:?}", |
| 249 | + s, |
| 250 | + formats |
| 251 | + ) |
| 252 | +} |
0 commit comments