1515// specific language governing permissions and limitations
1616// under the License.
1717
18+ use std:: marker:: PhantomData ;
1819use std:: sync:: Arc ;
1920
21+ use arrow:: array:: timezone:: Tz ;
2022use arrow:: array:: {
2123 Array , ArrowPrimitiveType , AsArray , GenericStringArray , PrimitiveArray ,
2224 StringArrayType , StringViewArray ,
2325} ;
24- use arrow:: compute:: kernels:: cast_utils:: string_to_timestamp_nanos;
25- use arrow:: datatypes:: DataType ;
26+ use arrow:: compute:: kernels:: cast_utils:: {
27+ string_to_datetime, string_to_timestamp_nanos,
28+ } ;
29+ use arrow:: datatypes:: { DataType , TimeUnit } ;
30+ use arrow_buffer:: ArrowNativeType ;
2631use chrono:: format:: { parse, Parsed , StrftimeItems } ;
2732use chrono:: LocalResult :: Single ;
2833use chrono:: { DateTime , TimeZone , Utc } ;
29-
3034use datafusion_common:: cast:: as_generic_string_array;
3135use datafusion_common:: {
32- exec_datafusion_err, exec_err, unwrap_or_internal_err , DataFusionError , Result ,
33- ScalarType , ScalarValue ,
36+ exec_datafusion_err, exec_err, internal_datafusion_err , unwrap_or_internal_err ,
37+ DataFusionError , Result , ScalarValue ,
3438} ;
3539use datafusion_expr:: ColumnarValue ;
40+ use num_traits:: { PrimInt , ToPrimitive } ;
3641
3742/// Error message if nanosecond conversion request beyond supported interval
3843const ERR_NANOSECONDS_NOT_SUPPORTED : & str = "The dates that can be represented as nanoseconds have to be between 1677-09-21T00:12:44.0 and 2262-04-11T23:47:16.854775804" ;
3944
45+ #[ expect( unused) ]
4046/// Calls string_to_timestamp_nanos and converts the error type
4147pub ( crate ) fn string_to_timestamp_nanos_shim ( s : & str ) -> Result < i64 > {
4248 string_to_timestamp_nanos ( s) . map_err ( |e| e. into ( ) )
4349}
4450
51+ pub ( crate ) fn string_to_timestamp_nanos_with_timezone (
52+ timezone : & Option < Tz > ,
53+ s : & str ,
54+ ) -> Result < i64 > {
55+ let tz = timezone. unwrap_or ( "UTC" . parse ( ) ?) ;
56+ let dt = string_to_datetime ( & tz, s) ?;
57+ let parsed = dt
58+ . timestamp_nanos_opt ( )
59+ . ok_or_else ( || exec_datafusion_err ! ( "{ERR_NANOSECONDS_NOT_SUPPORTED}" ) ) ?;
60+
61+ Ok ( parsed)
62+ }
63+
4564/// Checks that all the arguments from the second are of type [Utf8], [LargeUtf8] or [Utf8View]
4665///
4766/// [Utf8]: DataType::Utf8
@@ -69,13 +88,12 @@ pub(crate) fn validate_data_types(args: &[ColumnarValue], name: &str) -> Result<
6988/// Accepts a string and parses it using the [`chrono::format::strftime`] specifiers
7089/// relative to the provided `timezone`
7190///
72- /// [IANA timezones] are only supported if the `arrow-array/chrono-tz` feature is enabled
73- ///
74- /// * `2023-01-01 040506 America/Los_Angeles`
75- ///
7691/// If a timestamp is ambiguous, for example as a result of daylight-savings time, an error
7792/// will be returned
7893///
94+ /// Note that parsing [IANA timezones] is not supported yet in chrono - <https://github.com/chronotope/chrono/issues/38>
95+ /// and this implementation only supports named timezones at the end of the string preceded by a space.
96+ ///
7997/// [`chrono::format::strftime`]: https://docs.rs/chrono/latest/chrono/format/strftime/index.html
8098/// [IANA timezones]: https://www.iana.org/time-zones
8199pub ( crate ) fn string_to_datetime_formatted < T : TimeZone > (
@@ -89,11 +107,52 @@ pub(crate) fn string_to_datetime_formatted<T: TimeZone>(
89107 )
90108 } ;
91109
110+ let mut datetime_str = s;
111+ let mut format = format;
112+
113+ // we manually handle the most common case of a named timezone at the end of the timestamp
114+ // not that %+ handles 'Z' at the end of the string without a space. This code doesn't
115+ // handle named timezones with no preceding space since that would require writing a
116+ // custom parser (or switching to Jiff)
117+ let tz: Option < chrono_tz:: Tz > = if format. ends_with ( " %Z" ) {
118+ // grab the string after the last space as the named timezone
119+ let parts: Vec < & str > = datetime_str. rsplitn ( 2 , ' ' ) . collect ( ) ;
120+ let timezone_name = parts[ 0 ] ;
121+ datetime_str = parts[ 1 ] ;
122+
123+ // attempt to parse the timezone name
124+ let result: Result < chrono_tz:: Tz , chrono_tz:: ParseError > = timezone_name. parse ( ) ;
125+ let Ok ( tz) = result else {
126+ return Err ( err ( & result. unwrap_err ( ) . to_string ( ) ) ) ;
127+ } ;
128+
129+ // successfully parsed the timezone name, remove the ' %Z' from the format
130+ format = format. trim_end_matches ( " %Z" ) ;
131+
132+ Some ( tz)
133+ } else if format. contains ( "%Z" ) {
134+ return Err ( err (
135+ "'%Z' is only supported at the end of the format string preceded by a space" ,
136+ ) ) ;
137+ } else {
138+ None
139+ } ;
140+
92141 let mut parsed = Parsed :: new ( ) ;
93- parse ( & mut parsed, s, StrftimeItems :: new ( format) ) . map_err ( |e| err ( & e. to_string ( ) ) ) ?;
142+ parse ( & mut parsed, datetime_str, StrftimeItems :: new ( format) )
143+ . map_err ( |e| err ( & e. to_string ( ) ) ) ?;
94144
95- // attempt to parse the string assuming it has a timezone
96- let dt = parsed. to_datetime ( ) ;
145+ let dt = match tz {
146+ Some ( tz) => {
147+ // A timezone was manually parsed out, convert it to a fixed offset
148+ match parsed. to_datetime_with_timezone ( & tz) {
149+ Ok ( dt) => Ok ( dt. fixed_offset ( ) ) ,
150+ Err ( e) => Err ( e) ,
151+ }
152+ }
153+ // default to parse the string assuming it has a timezone
154+ None => parsed. to_datetime ( ) ,
155+ } ;
97156
98157 if let Err ( e) = & dt {
99158 // no timezone or other failure, try without a timezone
@@ -141,6 +200,7 @@ pub(crate) fn string_to_datetime_formatted<T: TimeZone>(
141200///
142201/// [`chrono::format::strftime`]: https://docs.rs/chrono/latest/chrono/format/strftime/index.html
143202#[ inline]
203+ #[ expect( unused) ]
144204pub ( crate ) fn string_to_timestamp_nanos_formatted (
145205 s : & str ,
146206 format : & str ,
@@ -152,6 +212,20 @@ pub(crate) fn string_to_timestamp_nanos_formatted(
152212 . ok_or_else ( || exec_datafusion_err ! ( "{ERR_NANOSECONDS_NOT_SUPPORTED}" ) )
153213}
154214
215+ pub ( crate ) fn string_to_timestamp_nanos_formatted_with_timezone (
216+ timezone : & Option < Tz > ,
217+ s : & str ,
218+ format : & str ,
219+ ) -> Result < i64 , DataFusionError > {
220+ let dt =
221+ string_to_datetime_formatted ( & timezone. unwrap_or ( "UTC" . parse ( ) ?) , s, format) ?;
222+ let parsed = dt
223+ . timestamp_nanos_opt ( )
224+ . ok_or_else ( || exec_datafusion_err ! ( "{ERR_NANOSECONDS_NOT_SUPPORTED}" ) ) ?;
225+
226+ Ok ( parsed)
227+ }
228+
155229/// Accepts a string with a `chrono` format and converts it to a
156230/// millisecond precision timestamp.
157231///
@@ -176,14 +250,50 @@ pub(crate) fn string_to_timestamp_millis_formatted(s: &str, format: &str) -> Res
176250 . timestamp_millis ( ) )
177251}
178252
179- pub ( crate ) fn handle < O , F , S > (
253+ pub ( crate ) struct ScalarDataType < T : PrimInt > {
254+ data_type : DataType ,
255+ _marker : PhantomData < T > ,
256+ }
257+
258+ impl < T : PrimInt > ScalarDataType < T > {
259+ pub ( crate ) fn new ( dt : DataType ) -> Self {
260+ Self {
261+ data_type : dt,
262+ _marker : PhantomData ,
263+ }
264+ }
265+
266+ fn scalar ( & self , r : Option < i64 > ) -> Result < ScalarValue > {
267+ match & self . data_type {
268+ DataType :: Date32 => Ok ( ScalarValue :: Date32 ( r. and_then ( |v| v. to_i32 ( ) ) ) ) ,
269+ DataType :: Timestamp ( u, tz) => match u {
270+ TimeUnit :: Second => Ok ( ScalarValue :: TimestampSecond ( r, tz. clone ( ) ) ) ,
271+ TimeUnit :: Millisecond => {
272+ Ok ( ScalarValue :: TimestampMillisecond ( r, tz. clone ( ) ) )
273+ }
274+ TimeUnit :: Microsecond => {
275+ Ok ( ScalarValue :: TimestampMicrosecond ( r, tz. clone ( ) ) )
276+ }
277+ TimeUnit :: Nanosecond => {
278+ Ok ( ScalarValue :: TimestampNanosecond ( r, tz. clone ( ) ) )
279+ }
280+ } ,
281+ t => Err ( internal_datafusion_err ! (
282+ "Unsupported data type for ScalarDataType<T>: {t:?}"
283+ ) ) ,
284+ }
285+ }
286+ }
287+
288+ pub ( crate ) fn handle < O , F , T > (
180289 args : & [ ColumnarValue ] ,
181290 op : F ,
182291 name : & str ,
292+ sdt : & ScalarDataType < T > ,
183293) -> Result < ColumnarValue >
184294where
185295 O : ArrowPrimitiveType ,
186- S : ScalarType < O :: Native > ,
296+ T : PrimInt ,
187297 F : Fn ( & str ) -> Result < O :: Native > ,
188298{
189299 match & args[ 0 ] {
@@ -210,8 +320,13 @@ where
210320 } ,
211321 ColumnarValue :: Scalar ( scalar) => match scalar. try_as_str ( ) {
212322 Some ( a) => {
213- let result = a. as_ref ( ) . map ( |x| op ( x) ) . transpose ( ) ?;
214- Ok ( ColumnarValue :: Scalar ( S :: scalar ( result) ) )
323+ let result = a
324+ . as_ref ( )
325+ . map ( |x| op ( x) )
326+ . transpose ( ) ?
327+ . and_then ( |v| v. to_i64 ( ) ) ;
328+ let s = sdt. scalar ( result) ?;
329+ Ok ( ColumnarValue :: Scalar ( s) )
215330 }
216331 _ => exec_err ! ( "Unsupported data type {scalar:?} for function {name}" ) ,
217332 } ,
@@ -221,17 +336,18 @@ where
221336// Given a function that maps a `&str`, `&str` to an arrow native type,
222337// returns a `ColumnarValue` where the function is applied to either a `ArrayRef` or `ScalarValue`
223338// depending on the `args`'s variant.
224- pub ( crate ) fn handle_multiple < O , F , S , M > (
339+ pub ( crate ) fn handle_multiple < O , F , M , T > (
225340 args : & [ ColumnarValue ] ,
226341 op : F ,
227342 op2 : M ,
228343 name : & str ,
344+ sdt : & ScalarDataType < T > ,
229345) -> Result < ColumnarValue >
230346where
231347 O : ArrowPrimitiveType ,
232- S : ScalarType < O :: Native > ,
233348 F : Fn ( & str , & str ) -> Result < O :: Native > ,
234349 M : Fn ( O :: Native ) -> O :: Native ,
350+ T : PrimInt ,
235351{
236352 match & args[ 0 ] {
237353 ColumnarValue :: Array ( a) => match a. data_type ( ) {
@@ -286,9 +402,9 @@ where
286402 if let Some ( s) = x {
287403 match op ( a, s. as_str ( ) ) {
288404 Ok ( r) => {
289- ret = Some ( Ok ( ColumnarValue :: Scalar ( S :: scalar ( Some (
290- op2 ( r ) ,
291- ) ) ) ) ) ;
405+ let result = op2 ( r ) . to_i64 ( ) ;
406+ let s = sdt . scalar ( result ) ? ;
407+ ret = Some ( Ok ( ColumnarValue :: Scalar ( s ) ) ) ;
292408 break ;
293409 }
294410 Err ( e) => ret = Some ( Err ( e) ) ,
0 commit comments