16
16
// under the License.
17
17
18
18
use crate :: error:: { ArrowError , Result } ;
19
- use chrono:: { prelude:: * , LocalResult } ;
19
+ use chrono:: format:: Fixed :: { Nanosecond as FixedNanosecond , TimezoneOffsetColon } ;
20
+ use chrono:: format:: Item :: { Fixed , Literal , Numeric } ;
21
+ use chrono:: format:: Numeric :: Nanosecond ;
22
+ use chrono:: format:: Pad :: Zero ;
23
+ use chrono:: format:: { Item , Parsed } ;
24
+ use chrono:: prelude:: * ;
20
25
21
26
/// Accepts a string in RFC3339 / ISO8601 standard format and some
22
27
/// variants and converts it to a nanosecond precision timestamp.
@@ -78,45 +83,92 @@ pub fn string_to_timestamp_nanos(s: &str) -> Result<i64> {
78
83
// separating the date and time with a space ' ' rather than 'T' to be
79
84
// (more) compatible with Apache Spark SQL
80
85
81
- // timezone offset, using ' ' as a separator
82
- // Example: 2020-09-08 13:42:29.190855-05:00
83
- if let Ok ( ts) = DateTime :: parse_from_str ( s, "%Y-%m-%d %H:%M:%S%.f%:z" ) {
84
- return Ok ( ts. timestamp_nanos ( ) ) ;
86
+ // We parse the date and time prefix first to share work between all the different formats.
87
+ let mut rest = s;
88
+ let mut p;
89
+ let separator_is_space;
90
+ match try_parse_prefix ( & mut rest) {
91
+ Some ( ParsedPrefix {
92
+ result,
93
+ separator_is_space : s,
94
+ } ) => {
95
+ p = result;
96
+ separator_is_space = s;
97
+ }
98
+ None => {
99
+ return Err ( ArrowError :: CastError ( format ! (
100
+ "Error parsing '{}' as timestamp" ,
101
+ s
102
+ ) ) ) ;
103
+ }
85
104
}
86
105
87
- // with an explicit Z, using ' ' as a separator
88
- // Example: 2020-09-08 13:42:29Z
89
- if let Ok ( ts) = Utc . datetime_from_str ( s, "%Y-%m-%d %H:%M:%S%.fZ" ) {
90
- return Ok ( ts. timestamp_nanos ( ) ) ;
91
- }
106
+ if separator_is_space {
107
+ // timezone offset, using ' ' as a separator
108
+ // Example: 2020-09-08 13:42:29.190855-05:00
109
+ // Full format string: "%Y-%m-%d %H:%M:%S%.f%:z".
110
+ const FORMAT1 : [ Item ; 2 ] = [ Fixed ( FixedNanosecond ) , Fixed ( TimezoneOffsetColon ) ] ;
111
+ if let Ok ( ts) = chrono:: format:: parse ( & mut p, rest, FORMAT1 . iter ( ) )
112
+ . and_then ( |( ) | p. to_datetime ( ) )
113
+ {
114
+ return Ok ( ts. timestamp_nanos ( ) ) ;
115
+ }
92
116
93
- // Support timestamps without an explicit timezone offset, again
94
- // to be compatible with what Apache Spark SQL does.
117
+ // with an explicit Z, using ' ' as a separator
118
+ // Example: 2020-09-08 13:42:29Z
119
+ // Full format string: "%Y-%m-%d %H:%M:%S%.fZ".
120
+ const FORMAT2 : [ Item ; 2 ] = [ Fixed ( FixedNanosecond ) , Literal ( "Z" ) ] ;
121
+ if let Ok ( ts) = chrono:: format:: parse ( & mut p, rest, FORMAT2 . iter ( ) )
122
+ . and_then ( |( ) | p. to_datetime_with_timezone ( & Utc ) )
123
+ {
124
+ return Ok ( ts. timestamp_nanos ( ) ) ;
125
+ }
95
126
96
- // without a timezone specifier as a local time, using T as a separator
97
- // Example: 2020-09-08T13:42:29.190855
98
- if let Ok ( ts) = NaiveDateTime :: parse_from_str ( s, "%Y-%m-%dT%H:%M:%S.%f" ) {
99
- return naive_datetime_to_timestamp ( s, ts) ;
100
- }
127
+ // without a timezone specifier as a local time, using ' ' as a separator
128
+ // Example: 2020-09-08 13:42:29.190855
129
+ const FORMAT5 : [ Item ; 2 ] = [ Literal ( "." ) , Numeric ( Nanosecond , Zero ) ] ;
130
+ // Full format string: "%Y-%m-%d %H:%M:%S.%f".
131
+ if let Ok ( ts) = chrono:: format:: parse ( & mut p, rest, FORMAT5 . iter ( ) )
132
+ . and_then ( |( ) | p. to_naive_datetime_with_offset ( 0 ) )
133
+ {
134
+ return naive_datetime_to_timestamp ( s, ts) ;
135
+ }
101
136
102
- // without a timezone specifier as a local time, using T as a
103
- // separator, no fractional seconds
104
- // Example: 2020-09-08T13:42:29
105
- if let Ok ( ts) = NaiveDateTime :: parse_from_str ( s, "%Y-%m-%dT%H:%M:%S" ) {
106
- return naive_datetime_to_timestamp ( s, ts) ;
137
+ // without a timezone specifier as a local time, using ' ' as a
138
+ // separator, no fractional seconds
139
+ // Example: 2020-09-08 13:42:29
140
+ // Full format string: "%Y-%m-%d %H:%M:%S".
141
+ if rest. is_empty ( ) {
142
+ if let Ok ( ts) = p. to_naive_datetime_with_offset ( 0 ) {
143
+ return naive_datetime_to_timestamp ( s, ts) ;
144
+ }
145
+ }
107
146
}
108
147
109
- // without a timezone specifier as a local time, using ' ' as a separator
110
- // Example: 2020-09-08 13:42:29.190855
111
- if let Ok ( ts) = NaiveDateTime :: parse_from_str ( s, "%Y-%m-%d %H:%M:%S.%f" ) {
112
- return naive_datetime_to_timestamp ( s, ts) ;
113
- }
148
+ // Support timestamps without an explicit timezone offset, again
149
+ // to be compatible with what Apache Spark SQL does.
150
+ if !separator_is_space
151
+ /* i.e. separator == b'T' */
152
+ {
153
+ // without a timezone specifier as a local time, using T as a separator
154
+ // Example: 2020-09-08T13:42:29.190855
155
+ // Full format string: "%Y-%m-%dT%H:%M:%S.%f".
156
+ const FORMAT3 : [ Item ; 2 ] = [ Literal ( "." ) , Numeric ( Nanosecond , Zero ) ] ;
157
+ if let Ok ( ts) = chrono:: format:: parse ( & mut p, rest, FORMAT3 . iter ( ) )
158
+ . and_then ( |( ) | p. to_naive_datetime_with_offset ( 0 ) )
159
+ {
160
+ return naive_datetime_to_timestamp ( s, ts) ;
161
+ }
114
162
115
- // without a timezone specifier as a local time, using ' ' as a
116
- // separator, no fractional seconds
117
- // Example: 2020-09-08 13:42:29
118
- if let Ok ( ts) = NaiveDateTime :: parse_from_str ( s, "%Y-%m-%d %H:%M:%S" ) {
119
- return naive_datetime_to_timestamp ( s, ts) ;
163
+ // without a timezone specifier as a local time, using T as a
164
+ // separator, no fractional seconds
165
+ // Example: 2020-09-08T13:42:29
166
+ // Full format string: "%Y-%m-%dT%H:%M:%S".
167
+ if rest. is_empty ( ) {
168
+ if let Ok ( ts) = p. to_naive_datetime_with_offset ( 0 ) {
169
+ return naive_datetime_to_timestamp ( s, ts) ;
170
+ }
171
+ }
120
172
}
121
173
122
174
// Note we don't pass along the error message from the underlying
@@ -130,33 +182,97 @@ pub fn string_to_timestamp_nanos(s: &str) -> Result<i64> {
130
182
) ) )
131
183
}
132
184
185
+ /// Parses YYYY-MM-DD(T| )HH:MM:SS.
186
+ fn try_parse_prefix ( s : & mut & str ) -> Option < ParsedPrefix > {
187
+ let mut p = Parsed :: new ( ) ;
188
+
189
+ let mut rest = s. as_bytes ( ) ;
190
+ let year = try_parse_num ( & mut rest) ?;
191
+ try_consume ( & mut rest, b'-' ) ?;
192
+ let month = try_parse_num ( & mut rest) ?;
193
+ try_consume ( & mut rest, b'-' ) ?;
194
+ let day = try_parse_num ( & mut rest) ?;
195
+ if rest. is_empty ( ) {
196
+ return None ;
197
+ }
198
+
199
+ let separator_is_space;
200
+ match rest[ 0 ] {
201
+ b' ' => separator_is_space = true ,
202
+ b'T' => separator_is_space = false ,
203
+ _ => return None ,
204
+ }
205
+
206
+ rest = & rest[ 1 ..] ;
207
+ let hour = try_parse_num ( & mut rest) ?;
208
+ try_consume ( & mut rest, b':' ) ?;
209
+ let minute = try_parse_num ( & mut rest) ?;
210
+ try_consume ( & mut rest, b':' ) ?;
211
+ let second = try_parse_num ( & mut rest) ?;
212
+
213
+ p. set_year ( year) . ok ( ) ?;
214
+ p. set_month ( month) . ok ( ) ?;
215
+ p. set_day ( day) . ok ( ) ?;
216
+ p. set_hour ( hour) . ok ( ) ?;
217
+ p. set_minute ( minute) . ok ( ) ?;
218
+ p. set_second ( second) . ok ( ) ?;
219
+
220
+ * s = unsafe { std:: str:: from_utf8_unchecked ( rest) } ;
221
+ Some ( ParsedPrefix {
222
+ result : p,
223
+ separator_is_space,
224
+ } )
225
+ }
226
+
227
+ #[ must_use]
228
+ fn try_parse_num ( s : & mut & [ u8 ] ) -> Option < i64 > {
229
+ if s. is_empty ( ) {
230
+ return None ;
231
+ }
232
+
233
+ let mut i;
234
+ if s[ 0 ] == b'-' {
235
+ i = 1
236
+ } else {
237
+ i = 0 ;
238
+ }
239
+
240
+ while i < s. len ( ) && b'0' <= s[ i] && s[ i] <= b'9' {
241
+ i += 1
242
+ }
243
+
244
+ let res = unsafe { std:: str:: from_utf8_unchecked ( & s[ 0 ..i] ) }
245
+ . parse ( )
246
+ . ok ( ) ;
247
+ * s = & s[ i..] ;
248
+ return res;
249
+ }
250
+
251
+ #[ must_use]
252
+ fn try_consume ( s : & mut & [ u8 ] , c : u8 ) -> Option < ( ) > {
253
+ if s. is_empty ( ) || s[ 0 ] != c {
254
+ return None ;
255
+ }
256
+ * s = & s[ 1 ..] ;
257
+ Some ( ( ) )
258
+ }
259
+
260
+ struct ParsedPrefix {
261
+ result : Parsed ,
262
+ separator_is_space : bool , // When false, the separator is 'T'.
263
+ }
264
+
133
265
/// Converts the naive datetime (which has no specific timezone) to a
134
266
/// nanosecond epoch timestamp relative to UTC.
135
- fn naive_datetime_to_timestamp ( s : & str , datetime : NaiveDateTime ) -> Result < i64 > {
136
- let l = Local { } ;
137
-
138
- match l. from_local_datetime ( & datetime) {
139
- LocalResult :: None => Err ( ArrowError :: CastError ( format ! (
140
- "Error parsing '{}' as timestamp: local time representation is invalid" ,
141
- s
142
- ) ) ) ,
143
- LocalResult :: Single ( local_datetime) => {
144
- Ok ( local_datetime. with_timezone ( & Utc ) . timestamp_nanos ( ) )
145
- }
146
- // Ambiguous times can happen if the timestamp is exactly when
147
- // a daylight savings time transition occurs, for example, and
148
- // so the datetime could validly be said to be in two
149
- // potential offsets. However, since we are about to convert
150
- // to UTC anyways, we can pick one arbitrarily
151
- LocalResult :: Ambiguous ( local_datetime, _) => {
152
- Ok ( local_datetime. with_timezone ( & Utc ) . timestamp_nanos ( ) )
153
- }
154
- }
267
+ fn naive_datetime_to_timestamp ( _s : & str , datetime : NaiveDateTime ) -> Result < i64 > {
268
+ // CubeStore-specific: do not take timezones into account.
269
+ Ok ( Utc . from_utc_datetime ( & datetime) . timestamp_nanos ( ) )
155
270
}
156
271
157
272
#[ cfg( test) ]
158
273
mod tests {
159
274
use super :: * ;
275
+ use chrono:: LocalResult ;
160
276
161
277
#[ test]
162
278
fn string_to_timestamp_timezone ( ) -> Result < ( ) > {
@@ -220,6 +336,7 @@ mod tests {
220
336
}
221
337
222
338
#[ test]
339
+ #[ ignore = "CubeStore always uses UTC" ]
223
340
#[ cfg_attr( miri, ignore) ] // unsupported operation: can't call foreign function: mktime
224
341
fn string_to_timestamp_no_timezone ( ) -> Result < ( ) > {
225
342
// This test is designed to succeed in regardless of the local
0 commit comments