@@ -389,7 +389,6 @@ macro_rules! cast_utf8_to_int {
389389 ( $array: expr, $eval_mode: expr, $array_type: ty, $cast_method: ident) => { {
390390 let len = $array. len( ) ;
391391 let mut cast_array = PrimitiveArray :: <$array_type>:: builder( len) ;
392-
393392 if $array. null_count( ) == 0 {
394393 for i in 0 ..len {
395394 if let Some ( cast_value) = $cast_method( $array. value( i) , $eval_mode) ? {
@@ -409,12 +408,10 @@ macro_rules! cast_utf8_to_int {
409408 }
410409 }
411410 }
412-
413411 let result: SparkResult <ArrayRef > = Ok ( Arc :: new( cast_array. finish( ) ) as ArrayRef ) ;
414412 result
415413 } } ;
416414}
417-
418415macro_rules! cast_utf8_to_timestamp {
419416 ( $array: expr, $eval_mode: expr, $array_type: ty, $cast_method: ident, $tz: expr) => { {
420417 let len = $array. len( ) ;
@@ -1944,35 +1941,6 @@ fn cast_string_to_i16(str: &str, eval_mode: EvalMode) -> SparkResult<Option<i16>
19441941
19451942/// Equivalent to org.apache.spark.unsafe.types.UTF8String.toInt(IntWrapper intWrapper)
19461943fn cast_string_to_i32 ( str : & str , eval_mode : EvalMode ) -> SparkResult < Option < i32 > > {
1947- // happy path
1948- let bytes = str. as_bytes ( ) ;
1949- let len = bytes. len ( ) ;
1950- if len > 0 && len <= 10 {
1951- // SAFETY: We checked len > 0 above
1952- let first = unsafe { * bytes. get_unchecked ( 0 ) } ;
1953- // Must start with digit for happy path
1954- if first >= b'0' && first <= b'9' {
1955- let mut result: i64 = ( first - b'0' ) as i64 ;
1956- let mut i = 1 ;
1957-
1958- // Try to parse remaining digits
1959- while i < len {
1960- let b = bytes[ i] ;
1961- if b >= b'0' && b <= b'9' {
1962- result = result * 10 + ( b - b'0' ) as i64 ;
1963- i += 1 ;
1964- } else {
1965- // Hit non-digit (space, sign, decimal, etc.) - Bail to slow path
1966- break ;
1967- }
1968- }
1969- if i == len && result <= i32:: MAX as i64 {
1970- return Ok ( Some ( result as i32 ) ) ;
1971- }
1972- // Otherwise fall through to slow path
1973- }
1974- }
1975-
19761944 do_cast_string_to_int :: < i32 > ( str, eval_mode, "INT" , i32:: MIN )
19771945}
19781946
@@ -2051,12 +2019,11 @@ fn do_cast_string_to_int<
20512019 }
20522020 }
20532021
2054- if !ch. is_ascii_digit ( ) {
2022+ let digit = if ch. is_ascii_digit ( ) {
2023+ ( ch as u32 ) - ( '0' as u32 )
2024+ } else {
20552025 return none_or_err ( eval_mode, type_name, str) ;
2056- }
2057- let digit = T :: from ( ( ch - b'0' ) as i32 ) ;
2058- result = ( result << 3 ) + ( result << 1 ) - digit;
2059- result = result * radix - digit;
2026+ } ;
20602027
20612028 // We are going to process the new digit and accumulate the result. However, before
20622029 // doing this, if the result is already smaller than the
@@ -2068,8 +2035,13 @@ fn do_cast_string_to_int<
20682035 // Since the previous result is greater than or equal to stopValue(Integer.MIN_VALUE /
20692036 // radix), we can just use `result > 0` to check overflow. If result
20702037 // overflows, we should stop
2071- if result > T :: zero ( ) {
2072- return none_or_err ( eval_mode, type_name, str) ;
2038+ let v = result * radix;
2039+ let digit = ( digit as i32 ) . into ( ) ;
2040+ match v. checked_sub ( & digit) {
2041+ Some ( x) if x <= T :: zero ( ) => result = x,
2042+ _ => {
2043+ return none_or_err ( eval_mode, type_name, str) ;
2044+ }
20732045 }
20742046 } else {
20752047 // make sure fractional digits are valid digits but ignore them
0 commit comments