40
40
mod decimal;
41
41
mod dictionary;
42
42
mod list;
43
+ mod string;
43
44
use crate :: cast:: decimal:: * ;
44
45
use crate :: cast:: dictionary:: * ;
45
46
use crate :: cast:: list:: * ;
47
+ use crate :: cast:: string:: * ;
46
48
47
49
use chrono:: { NaiveTime , Offset , TimeZone , Utc } ;
48
50
use std:: cmp:: Ordering ;
@@ -2001,26 +2003,6 @@ where
2001
2003
from. unary_opt :: < _ , R > ( num:: cast:: cast :: < T :: Native , R :: Native > )
2002
2004
}
2003
2005
2004
- fn value_to_string < O : OffsetSizeTrait > (
2005
- array : & dyn Array ,
2006
- options : & CastOptions ,
2007
- ) -> Result < ArrayRef , ArrowError > {
2008
- let mut builder = GenericStringBuilder :: < O > :: new ( ) ;
2009
- let formatter = ArrayFormatter :: try_new ( array, & options. format_options ) ?;
2010
- let nulls = array. nulls ( ) ;
2011
- for i in 0 ..array. len ( ) {
2012
- match nulls. map ( |x| x. is_null ( i) ) . unwrap_or_default ( ) {
2013
- true => builder. append_null ( ) ,
2014
- false => {
2015
- formatter. value ( i) . write ( & mut builder) ?;
2016
- // tell the builder the row is finished
2017
- builder. append_value ( "" ) ;
2018
- }
2019
- }
2020
- }
2021
- Ok ( Arc :: new ( builder. finish ( ) ) )
2022
- }
2023
-
2024
2006
fn cast_numeric_to_binary < FROM : ArrowPrimitiveType , O : OffsetSizeTrait > (
2025
2007
array : & dyn Array ,
2026
2008
) -> Result < ArrayRef , ArrowError > {
@@ -2034,172 +2016,6 @@ fn cast_numeric_to_binary<FROM: ArrowPrimitiveType, O: OffsetSizeTrait>(
2034
2016
) ) )
2035
2017
}
2036
2018
2037
- /// Parse UTF-8
2038
- fn parse_string < P : Parser , O : OffsetSizeTrait > (
2039
- array : & dyn Array ,
2040
- cast_options : & CastOptions ,
2041
- ) -> Result < ArrayRef , ArrowError > {
2042
- let string_array = array. as_string :: < O > ( ) ;
2043
- let array = if cast_options. safe {
2044
- let iter = string_array. iter ( ) . map ( |x| x. and_then ( P :: parse) ) ;
2045
-
2046
- // Benefit:
2047
- // 20% performance improvement
2048
- // Soundness:
2049
- // The iterator is trustedLen because it comes from an `StringArray`.
2050
- unsafe { PrimitiveArray :: < P > :: from_trusted_len_iter ( iter) }
2051
- } else {
2052
- let v = string_array
2053
- . iter ( )
2054
- . map ( |x| match x {
2055
- Some ( v) => P :: parse ( v) . ok_or_else ( || {
2056
- ArrowError :: CastError ( format ! (
2057
- "Cannot cast string '{}' to value of {:?} type" ,
2058
- v,
2059
- P :: DATA_TYPE
2060
- ) )
2061
- } ) ,
2062
- None => Ok ( P :: Native :: default ( ) ) ,
2063
- } )
2064
- . collect :: < Result < Vec < _ > , ArrowError > > ( ) ?;
2065
- PrimitiveArray :: new ( v. into ( ) , string_array. nulls ( ) . cloned ( ) )
2066
- } ;
2067
-
2068
- Ok ( Arc :: new ( array) as ArrayRef )
2069
- }
2070
-
2071
- /// Casts generic string arrays to an ArrowTimestampType (TimeStampNanosecondArray, etc.)
2072
- fn cast_string_to_timestamp < O : OffsetSizeTrait , T : ArrowTimestampType > (
2073
- array : & dyn Array ,
2074
- to_tz : & Option < Arc < str > > ,
2075
- cast_options : & CastOptions ,
2076
- ) -> Result < ArrayRef , ArrowError > {
2077
- let array = array. as_string :: < O > ( ) ;
2078
- let out: PrimitiveArray < T > = match to_tz {
2079
- Some ( tz) => {
2080
- let tz: Tz = tz. as_ref ( ) . parse ( ) ?;
2081
- cast_string_to_timestamp_impl ( array, & tz, cast_options) ?
2082
- }
2083
- None => cast_string_to_timestamp_impl ( array, & Utc , cast_options) ?,
2084
- } ;
2085
- Ok ( Arc :: new ( out. with_timezone_opt ( to_tz. clone ( ) ) ) )
2086
- }
2087
-
2088
- fn cast_string_to_timestamp_impl < O : OffsetSizeTrait , T : ArrowTimestampType , Tz : TimeZone > (
2089
- array : & GenericStringArray < O > ,
2090
- tz : & Tz ,
2091
- cast_options : & CastOptions ,
2092
- ) -> Result < PrimitiveArray < T > , ArrowError > {
2093
- if cast_options. safe {
2094
- let iter = array. iter ( ) . map ( |v| {
2095
- v. and_then ( |v| {
2096
- let naive = string_to_datetime ( tz, v) . ok ( ) ?. naive_utc ( ) ;
2097
- T :: make_value ( naive)
2098
- } )
2099
- } ) ;
2100
- // Benefit:
2101
- // 20% performance improvement
2102
- // Soundness:
2103
- // The iterator is trustedLen because it comes from an `StringArray`.
2104
-
2105
- Ok ( unsafe { PrimitiveArray :: from_trusted_len_iter ( iter) } )
2106
- } else {
2107
- let vec = array
2108
- . iter ( )
2109
- . map ( |v| {
2110
- v. map ( |v| {
2111
- let naive = string_to_datetime ( tz, v) ?. naive_utc ( ) ;
2112
- T :: make_value ( naive) . ok_or_else ( || {
2113
- ArrowError :: CastError ( format ! (
2114
- "Overflow converting {naive} to {:?}" ,
2115
- T :: UNIT
2116
- ) )
2117
- } )
2118
- } )
2119
- . transpose ( )
2120
- } )
2121
- . collect :: < Result < Vec < Option < i64 > > , _ > > ( ) ?;
2122
-
2123
- // Benefit:
2124
- // 20% performance improvement
2125
- // Soundness:
2126
- // The iterator is trustedLen because it comes from an `StringArray`.
2127
- Ok ( unsafe { PrimitiveArray :: from_trusted_len_iter ( vec. iter ( ) ) } )
2128
- }
2129
- }
2130
-
2131
- fn cast_string_to_interval < Offset , F , ArrowType > (
2132
- array : & dyn Array ,
2133
- cast_options : & CastOptions ,
2134
- parse_function : F ,
2135
- ) -> Result < ArrayRef , ArrowError >
2136
- where
2137
- Offset : OffsetSizeTrait ,
2138
- ArrowType : ArrowPrimitiveType ,
2139
- F : Fn ( & str ) -> Result < ArrowType :: Native , ArrowError > + Copy ,
2140
- {
2141
- let string_array = array
2142
- . as_any ( )
2143
- . downcast_ref :: < GenericStringArray < Offset > > ( )
2144
- . unwrap ( ) ;
2145
- let interval_array = if cast_options. safe {
2146
- let iter = string_array
2147
- . iter ( )
2148
- . map ( |v| v. and_then ( |v| parse_function ( v) . ok ( ) ) ) ;
2149
-
2150
- // Benefit:
2151
- // 20% performance improvement
2152
- // Soundness:
2153
- // The iterator is trustedLen because it comes from an `StringArray`.
2154
- unsafe { PrimitiveArray :: < ArrowType > :: from_trusted_len_iter ( iter) }
2155
- } else {
2156
- let vec = string_array
2157
- . iter ( )
2158
- . map ( |v| v. map ( parse_function) . transpose ( ) )
2159
- . collect :: < Result < Vec < _ > , ArrowError > > ( ) ?;
2160
-
2161
- // Benefit:
2162
- // 20% performance improvement
2163
- // Soundness:
2164
- // The iterator is trustedLen because it comes from an `StringArray`.
2165
- unsafe { PrimitiveArray :: < ArrowType > :: from_trusted_len_iter ( vec) }
2166
- } ;
2167
- Ok ( Arc :: new ( interval_array) as ArrayRef )
2168
- }
2169
-
2170
- fn cast_string_to_year_month_interval < Offset : OffsetSizeTrait > (
2171
- array : & dyn Array ,
2172
- cast_options : & CastOptions ,
2173
- ) -> Result < ArrayRef , ArrowError > {
2174
- cast_string_to_interval :: < Offset , _ , IntervalYearMonthType > (
2175
- array,
2176
- cast_options,
2177
- parse_interval_year_month,
2178
- )
2179
- }
2180
-
2181
- fn cast_string_to_day_time_interval < Offset : OffsetSizeTrait > (
2182
- array : & dyn Array ,
2183
- cast_options : & CastOptions ,
2184
- ) -> Result < ArrayRef , ArrowError > {
2185
- cast_string_to_interval :: < Offset , _ , IntervalDayTimeType > (
2186
- array,
2187
- cast_options,
2188
- parse_interval_day_time,
2189
- )
2190
- }
2191
-
2192
- fn cast_string_to_month_day_nano_interval < Offset : OffsetSizeTrait > (
2193
- array : & dyn Array ,
2194
- cast_options : & CastOptions ,
2195
- ) -> Result < ArrayRef , ArrowError > {
2196
- cast_string_to_interval :: < Offset , _ , IntervalMonthDayNanoType > (
2197
- array,
2198
- cast_options,
2199
- parse_interval_month_day_nano,
2200
- )
2201
- }
2202
-
2203
2019
fn adjust_timestamp_to_timezone < T : ArrowTimestampType > (
2204
2020
array : PrimitiveArray < Int64Type > ,
2205
2021
to_tz : & Tz ,
@@ -2222,41 +2038,6 @@ fn adjust_timestamp_to_timezone<T: ArrowTimestampType>(
2222
2038
Ok ( adjusted)
2223
2039
}
2224
2040
2225
- /// Casts Utf8 to Boolean
2226
- fn cast_utf8_to_boolean < OffsetSize > (
2227
- from : & dyn Array ,
2228
- cast_options : & CastOptions ,
2229
- ) -> Result < ArrayRef , ArrowError >
2230
- where
2231
- OffsetSize : OffsetSizeTrait ,
2232
- {
2233
- let array = from
2234
- . as_any ( )
2235
- . downcast_ref :: < GenericStringArray < OffsetSize > > ( )
2236
- . unwrap ( ) ;
2237
-
2238
- let output_array = array
2239
- . iter ( )
2240
- . map ( |value| match value {
2241
- Some ( value) => match value. to_ascii_lowercase ( ) . trim ( ) {
2242
- "t" | "tr" | "tru" | "true" | "y" | "ye" | "yes" | "on" | "1" => Ok ( Some ( true ) ) ,
2243
- "f" | "fa" | "fal" | "fals" | "false" | "n" | "no" | "of" | "off" | "0" => {
2244
- Ok ( Some ( false ) )
2245
- }
2246
- invalid_value => match cast_options. safe {
2247
- true => Ok ( None ) ,
2248
- false => Err ( ArrowError :: CastError ( format ! (
2249
- "Cannot cast value '{invalid_value}' to value of Boolean type" ,
2250
- ) ) ) ,
2251
- } ,
2252
- } ,
2253
- None => Ok ( None ) ,
2254
- } )
2255
- . collect :: < Result < BooleanArray , _ > > ( ) ?;
2256
-
2257
- Ok ( Arc :: new ( output_array) )
2258
- }
2259
-
2260
2041
/// Cast numeric types to Boolean
2261
2042
///
2262
2043
/// Any zero value returns `false` while non-zero returns `true`
@@ -2325,37 +2106,6 @@ where
2325
2106
unsafe { PrimitiveArray :: < T > :: from_trusted_len_iter ( iter) }
2326
2107
}
2327
2108
2328
- /// A specified helper to cast from `GenericBinaryArray` to `GenericStringArray` when they have same
2329
- /// offset size so re-encoding offset is unnecessary.
2330
- fn cast_binary_to_string < O : OffsetSizeTrait > (
2331
- array : & dyn Array ,
2332
- cast_options : & CastOptions ,
2333
- ) -> Result < ArrayRef , ArrowError > {
2334
- let array = array
2335
- . as_any ( )
2336
- . downcast_ref :: < GenericByteArray < GenericBinaryType < O > > > ( )
2337
- . unwrap ( ) ;
2338
-
2339
- match GenericStringArray :: < O > :: try_from_binary ( array. clone ( ) ) {
2340
- Ok ( a) => Ok ( Arc :: new ( a) ) ,
2341
- Err ( e) => match cast_options. safe {
2342
- true => {
2343
- // Fallback to slow method to convert invalid sequences to nulls
2344
- let mut builder =
2345
- GenericStringBuilder :: < O > :: with_capacity ( array. len ( ) , array. value_data ( ) . len ( ) ) ;
2346
-
2347
- let iter = array
2348
- . iter ( )
2349
- . map ( |v| v. and_then ( |v| std:: str:: from_utf8 ( v) . ok ( ) ) ) ;
2350
-
2351
- builder. extend ( iter) ;
2352
- Ok ( Arc :: new ( builder. finish ( ) ) )
2353
- }
2354
- false => Err ( e) ,
2355
- } ,
2356
- }
2357
- }
2358
-
2359
2109
/// Helper function to cast from one `BinaryArray` or 'LargeBinaryArray' to 'FixedSizeBinaryArray'.
2360
2110
fn cast_binary_to_fixed_size_binary < O : OffsetSizeTrait > (
2361
2111
array : & dyn Array ,
0 commit comments