1515// specific language governing permissions and limitations
1616// under the License.
1717
18- use std:: sync:: { Arc , LazyLock } ;
18+ use std:: sync:: Arc ;
1919
20- use arrow:: array:: timezone:: Tz ;
2120use arrow:: array:: {
2221 Array , ArrowPrimitiveType , AsArray , GenericStringArray , PrimitiveArray ,
2322 StringArrayType , StringViewArray ,
2423} ;
2524use arrow:: compute:: DecimalCast ;
26- use arrow:: compute:: kernels:: cast_utils:: string_to_datetime;
2725use arrow:: datatypes:: { DataType , TimeUnit } ;
2826use arrow_buffer:: ArrowNativeType ;
29- use chrono:: LocalResult :: Single ;
30- use chrono:: format:: { Parsed , StrftimeItems , parse} ;
31- use chrono:: { DateTime , TimeZone , Utc } ;
3227use datafusion_common:: cast:: as_generic_string_array;
3328use datafusion_common:: {
34- DataFusionError , Result , ScalarValue , exec_datafusion_err, exec_err,
35- internal_datafusion_err, unwrap_or_internal_err,
29+ Result , ScalarValue , exec_err, internal_datafusion_err, unwrap_or_internal_err,
3630} ;
3731use datafusion_expr:: ColumnarValue ;
3832
39- /// Error message if nanosecond conversion request beyond supported interval
40- const ERR_NANOSECONDS_NOT_SUPPORTED : & str = "The dates that can be represented as nanoseconds have to be between 1677-09-21T00:12:44.0 and 2262-04-11T23:47:16.854775804" ;
41-
42- static UTC : LazyLock < Tz > = LazyLock :: new ( || "UTC" . parse ( ) . expect ( "UTC is always valid" ) ) ;
43-
44- /// Converts a string representation of a date‑time into a timestamp expressed in
45- /// nanoseconds since the Unix epoch.
46- ///
47- /// This helper is a thin wrapper around the more general `string_to_datetime`
48- /// function. It accepts an optional `timezone` which, if `None`, defaults to
49- /// Coordinated Universal Time (UTC). The string `s` must contain a valid
50- /// date‑time format that can be parsed by the underlying chrono parser.
51- ///
52- /// # Return Value
53- ///
54- /// * `Ok(i64)` – The number of nanoseconds since `1970‑01‑01T00:00:00Z`.
55- /// * `Err(DataFusionError)` – If the string cannot be parsed, the parsed
56- /// value is out of range (between 1677-09-21T00:12:44.0 and 2262-04-11T23:47:16.854775804)
57- /// or the parsed value does not correspond to an unambiguous time.
58- pub ( crate ) fn string_to_timestamp_nanos_with_timezone (
59- timezone : & Option < Tz > ,
60- s : & str ,
61- ) -> Result < i64 > {
62- let tz = timezone. as_ref ( ) . unwrap_or ( & UTC ) ;
63- let dt = string_to_datetime ( tz, s) ?;
64- let parsed = dt
65- . timestamp_nanos_opt ( )
66- . ok_or_else ( || exec_datafusion_err ! ( "{ERR_NANOSECONDS_NOT_SUPPORTED}" ) ) ?;
67-
68- Ok ( parsed)
69- }
70-
7133/// Checks that all the arguments from the second are of type [Utf8], [LargeUtf8] or [Utf8View]
7234///
7335/// [Utf8]: DataType::Utf8
@@ -92,161 +54,6 @@ pub(crate) fn validate_data_types(args: &[ColumnarValue], name: &str) -> Result<
9254 Ok ( ( ) )
9355}
9456
95- /// Accepts a string and parses it using the [`chrono::format::strftime`] specifiers
96- /// relative to the provided `timezone`
97- ///
98- /// If a timestamp is ambiguous, for example as a result of daylight-savings time, an error
99- /// will be returned
100- ///
101- /// Note that parsing [IANA timezones] is not supported yet in chrono - <https://github.com/chronotope/chrono/issues/38>
102- /// and this implementation only supports named timezones at the end of the string preceded by a space.
103- ///
104- /// [`chrono::format::strftime`]: https://docs.rs/chrono/latest/chrono/format/strftime/index.html
105- /// [IANA timezones]: https://www.iana.org/time-zones
106- pub ( crate ) fn string_to_datetime_formatted < T : TimeZone > (
107- timezone : & T ,
108- s : & str ,
109- format : & str ,
110- ) -> Result < DateTime < T > , DataFusionError > {
111- let err = |err_ctx : & str | {
112- exec_datafusion_err ! (
113- "Error parsing timestamp from '{s}' using format '{format}': {err_ctx}"
114- )
115- } ;
116-
117- let mut datetime_str = s;
118- let mut format = format;
119-
120- // Manually handle the most common case of a named timezone at the end of the timestamp.
121- // Note that %+ handles 'Z' at the end of the string without a space. This code doesn't
122- // handle named timezones with no preceding space since that would require writing a
123- // custom parser (or switching to Jiff)
124- let tz: Option < chrono_tz:: Tz > = if format. trim_end ( ) . ends_with ( " %Z" ) {
125- // grab the string after the last space as the named timezone
126- if let Some ( ( dt_str, timezone_name) ) = datetime_str. trim_end ( ) . rsplit_once ( ' ' ) {
127- datetime_str = dt_str;
128-
129- // attempt to parse the timezone name
130- let result: Result < chrono_tz:: Tz , chrono_tz:: ParseError > =
131- timezone_name. parse ( ) ;
132- let Ok ( tz) = result else {
133- return Err ( err ( & result. unwrap_err ( ) . to_string ( ) ) ) ;
134- } ;
135-
136- // successfully parsed the timezone name, remove the ' %Z' from the format
137- format = & format[ ..format. len ( ) - 3 ] ;
138-
139- Some ( tz)
140- } else {
141- None
142- }
143- } else if format. contains ( "%Z" ) {
144- return Err ( err (
145- "'%Z' is only supported at the end of the format string preceded by a space" ,
146- ) ) ;
147- } else {
148- None
149- } ;
150-
151- let mut parsed = Parsed :: new ( ) ;
152- parse ( & mut parsed, datetime_str, StrftimeItems :: new ( format) )
153- . map_err ( |e| err ( & e. to_string ( ) ) ) ?;
154-
155- let dt = match tz {
156- Some ( tz) => {
157- // A timezone was manually parsed out, convert it to a fixed offset
158- match parsed. to_datetime_with_timezone ( & tz) {
159- Ok ( dt) => Ok ( dt. fixed_offset ( ) ) ,
160- Err ( e) => Err ( e) ,
161- }
162- }
163- // default to parse the string assuming it has a timezone
164- None => parsed. to_datetime ( ) ,
165- } ;
166-
167- if let Err ( e) = & dt {
168- // no timezone or other failure, try without a timezone
169- let ndt = parsed
170- . to_naive_datetime_with_offset ( 0 )
171- . or_else ( |_| parsed. to_naive_date ( ) . map ( |nd| nd. into ( ) ) ) ;
172- if let Err ( e) = & ndt {
173- return Err ( err ( & e. to_string ( ) ) ) ;
174- }
175-
176- if let Single ( e) = & timezone. from_local_datetime ( & ndt. unwrap ( ) ) {
177- Ok ( e. to_owned ( ) )
178- } else {
179- Err ( err ( & e. to_string ( ) ) )
180- }
181- } else {
182- Ok ( dt. unwrap ( ) . with_timezone ( timezone) )
183- }
184- }
185-
186- /// Accepts a string with a `chrono` format and converts it to a
187- /// nanosecond precision timestamp relative to the provided `timezone`.
188- ///
189- /// See [`chrono::format::strftime`] for the full set of supported formats.
190- ///
191- /// Implements the `to_timestamp` function to convert a string to a
192- /// timestamp, following the model of spark SQL’s to_`timestamp`.
193- ///
194- /// Internally, this function uses the `chrono` library for the
195- /// datetime parsing
196- ///
197- /// ## Timestamp Precision
198- ///
199- /// Function uses the maximum precision timestamps supported by
200- /// Arrow (nanoseconds stored as a 64-bit integer) timestamps. This
201- /// means the range of dates that timestamps can represent is ~1677 AD
202- /// to 2262 AM
203- ///
204- /// ## Timezone / Offset Handling
205- ///
206- /// Numerical values of timestamps are stored compared to offset UTC.
207- ///
208- /// Any timestamp in the formatting string is handled according to the rules
209- /// defined by `chrono`.
210- ///
211- /// [`chrono::format::strftime`]: https://docs.rs/chrono/latest/chrono/format/strftime/index.html
212- #[ inline]
213- pub ( crate ) fn string_to_timestamp_nanos_formatted_with_timezone (
214- timezone : & Option < Tz > ,
215- s : & str ,
216- format : & str ,
217- ) -> Result < i64 , DataFusionError > {
218- let dt = string_to_datetime_formatted ( timezone. as_ref ( ) . unwrap_or ( & UTC ) , s, format) ?;
219- let parsed = dt
220- . timestamp_nanos_opt ( )
221- . ok_or_else ( || exec_datafusion_err ! ( "{ERR_NANOSECONDS_NOT_SUPPORTED}" ) ) ?;
222-
223- Ok ( parsed)
224- }
225-
226- /// Accepts a string with a `chrono` format and converts it to a
227- /// millisecond precision timestamp relative to the provided `timezone`.
228- ///
229- /// See [`chrono::format::strftime`] for the full set of supported formats.
230- ///
231- /// Internally, this function uses the `chrono` library for the
232- /// datetime parsing
233- ///
234- /// ## Timezone / Offset Handling
235- ///
236- /// Numerical values of timestamps are stored compared to offset UTC.
237- ///
238- /// Any timestamp in the formatting string is handled according to the rules
239- /// defined by `chrono`.
240- ///
241- /// [`chrono::format::strftime`]: https://docs.rs/chrono/latest/chrono/format/strftime/index.html
242- #[ inline]
243- pub ( crate ) fn string_to_timestamp_millis_formatted ( s : & str , format : & str ) -> Result < i64 > {
244- Ok ( string_to_datetime_formatted ( & Utc , s, format) ?
245- . naive_utc ( )
246- . and_utc ( )
247- . timestamp_millis ( ) )
248- }
249-
25057pub ( crate ) fn handle < O , F > (
25158 args : & [ ColumnarValue ] ,
25259 op : F ,
@@ -306,7 +113,7 @@ pub(crate) fn handle_multiple<O, F, M>(
306113) -> Result < ColumnarValue >
307114where
308115 O : ArrowPrimitiveType ,
309- F : Fn ( & str , & str ) -> Result < O :: Native > ,
116+ F : Fn ( & str , & [ & str ] ) -> Result < O :: Native > ,
310117 M : Fn ( O :: Native ) -> O :: Native ,
311118{
312119 match & args[ 0 ] {
@@ -372,7 +179,7 @@ where
372179 } ;
373180
374181 if let Some ( s) = x {
375- match op ( a, s. as_str ( ) ) {
182+ match op ( a, & [ s. as_str ( ) ] ) {
376183 Ok ( r) => {
377184 let result = op2 ( r) . to_i64 ( ) ;
378185 let s = scalar_value ( dt, result) ?;
@@ -411,7 +218,7 @@ pub(crate) fn strings_to_primitive_function<O, F, F2>(
411218) -> Result < PrimitiveArray < O > >
412219where
413220 O : ArrowPrimitiveType ,
414- F : Fn ( & str , & str ) -> Result < O :: Native > ,
221+ F : Fn ( & str , & [ & str ] ) -> Result < O :: Native > ,
415222 F2 : Fn ( O :: Native ) -> O :: Native ,
416223{
417224 if args. len ( ) < 2 {
@@ -472,7 +279,7 @@ fn handle_array_op<'a, O, V, F, F2>(
472279where
473280 V : StringArrayType < ' a > ,
474281 O : ArrowPrimitiveType ,
475- F : Fn ( & str , & str ) -> Result < O :: Native > ,
282+ F : Fn ( & str , & [ & str ] ) -> Result < O :: Native > ,
476283 F2 : Fn ( O :: Native ) -> O :: Native ,
477284{
478285 first
@@ -481,28 +288,39 @@ where
481288 . map ( |( pos, x) | {
482289 let mut val = None ;
483290 if let Some ( x) = x {
291+ let mut v = vec ! [ ] ;
292+
484293 for arg in args {
485- let v = match arg {
294+ match arg {
486295 ColumnarValue :: Array ( a) => match a. data_type ( ) {
487- DataType :: Utf8View => Ok ( a. as_string_view ( ) . value ( pos) ) ,
488- DataType :: LargeUtf8 => Ok ( a. as_string :: < i64 > ( ) . value ( pos) ) ,
489- DataType :: Utf8 => Ok ( a. as_string :: < i32 > ( ) . value ( pos) ) ,
490- other => exec_err ! ( "Unexpected type encountered '{other}'" ) ,
296+ DataType :: Utf8View => v. push ( a. as_string_view ( ) . value ( pos) ) ,
297+ DataType :: LargeUtf8 => {
298+ v. push ( a. as_string :: < i64 > ( ) . value ( pos) )
299+ }
300+ DataType :: Utf8 => v. push ( a. as_string :: < i32 > ( ) . value ( pos) ) ,
301+ other => {
302+ return exec_err ! (
303+ "Unexpected type encountered '{other}'"
304+ ) ;
305+ }
491306 } ,
492307 ColumnarValue :: Scalar ( s) => match s. try_as_str ( ) {
493- Some ( Some ( v ) ) => Ok ( v ) ,
308+ Some ( Some ( s ) ) => v . push ( s ) ,
494309 Some ( None ) => continue , // null string
495- None => exec_err ! ( "Unexpected scalar type encountered '{s}'" ) ,
310+ None => {
311+ return exec_err ! (
312+ "Unexpected scalar type encountered '{s}'"
313+ ) ;
314+ }
496315 } ,
497- } ?;
316+ } ;
317+ }
498318
499- let r = op ( x, v) ;
500- if let Ok ( inner) = r {
501- val = Some ( Ok ( op2 ( inner) ) ) ;
502- break ;
503- } else {
504- val = Some ( r) ;
505- }
319+ let r = op ( x, & v) ;
320+ if let Ok ( inner) = r {
321+ val = Some ( Ok ( op2 ( inner) ) ) ;
322+ } else {
323+ val = Some ( r) ;
506324 }
507325 } ;
508326
0 commit comments