Skip to content

Commit 2b331f0

Browse files
committed
perf_string_to_int
1 parent ca90587 commit 2b331f0

File tree

1 file changed

+11
-39
lines changed
  • native/spark-expr/src/conversion_funcs

1 file changed

+11
-39
lines changed

native/spark-expr/src/conversion_funcs/cast.rs

Lines changed: 11 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -389,7 +389,6 @@ macro_rules! cast_utf8_to_int {
389389
($array:expr, $eval_mode:expr, $array_type:ty, $cast_method:ident) => {{
390390
let len = $array.len();
391391
let mut cast_array = PrimitiveArray::<$array_type>::builder(len);
392-
393392
if $array.null_count() == 0 {
394393
for i in 0..len {
395394
if let Some(cast_value) = $cast_method($array.value(i), $eval_mode)? {
@@ -409,12 +408,10 @@ macro_rules! cast_utf8_to_int {
409408
}
410409
}
411410
}
412-
413411
let result: SparkResult<ArrayRef> = Ok(Arc::new(cast_array.finish()) as ArrayRef);
414412
result
415413
}};
416414
}
417-
418415
macro_rules! cast_utf8_to_timestamp {
419416
($array:expr, $eval_mode:expr, $array_type:ty, $cast_method:ident, $tz:expr) => {{
420417
let len = $array.len();
@@ -1944,35 +1941,6 @@ fn cast_string_to_i16(str: &str, eval_mode: EvalMode) -> SparkResult<Option<i16>
19441941

19451942
/// Equivalent to org.apache.spark.unsafe.types.UTF8String.toInt(IntWrapper intWrapper)
19461943
fn cast_string_to_i32(str: &str, eval_mode: EvalMode) -> SparkResult<Option<i32>> {
1947-
// happy path
1948-
let bytes = str.as_bytes();
1949-
let len = bytes.len();
1950-
if len > 0 && len <= 10 {
1951-
// SAFETY: We checked len > 0 above
1952-
let first = unsafe { *bytes.get_unchecked(0) };
1953-
// Must start with digit for happy path
1954-
if first >= b'0' && first <= b'9' {
1955-
let mut result: i64 = (first - b'0') as i64;
1956-
let mut i = 1;
1957-
1958-
// Try to parse remaining digits
1959-
while i < len {
1960-
let b = bytes[i];
1961-
if b >= b'0' && b <= b'9' {
1962-
result = result * 10 + (b - b'0') as i64;
1963-
i += 1;
1964-
} else {
1965-
// Hit non-digit (space, sign, decimal, etc.) - Bail to slow path
1966-
break;
1967-
}
1968-
}
1969-
if i == len && result <= i32::MAX as i64 {
1970-
return Ok(Some(result as i32));
1971-
}
1972-
// Otherwise fall through to slow path
1973-
}
1974-
}
1975-
19761944
do_cast_string_to_int::<i32>(str, eval_mode, "INT", i32::MIN)
19771945
}
19781946

@@ -2051,12 +2019,11 @@ fn do_cast_string_to_int<
20512019
}
20522020
}
20532021

2054-
if !ch.is_ascii_digit() {
2022+
let digit = if ch.is_ascii_digit() {
2023+
(ch as u32) - ('0' as u32)
2024+
} else {
20552025
return none_or_err(eval_mode, type_name, str);
2056-
}
2057-
let digit = T::from((ch - b'0') as i32);
2058-
result = (result << 3) + (result << 1) - digit;
2059-
result = result * radix - digit;
2026+
};
20602027

20612028
// We are going to process the new digit and accumulate the result. However, before
20622029
// doing this, if the result is already smaller than the
@@ -2068,8 +2035,13 @@ fn do_cast_string_to_int<
20682035
// Since the previous result is greater than or equal to stopValue(Integer.MIN_VALUE /
20692036
// radix), we can just use `result > 0` to check overflow. If result
20702037
// overflows, we should stop
2071-
if result > T::zero() {
2072-
return none_or_err(eval_mode, type_name, str);
2038+
let v = result * radix;
2039+
let digit = (digit as i32).into();
2040+
match v.checked_sub(&digit) {
2041+
Some(x) if x <= T::zero() => result = x,
2042+
_ => {
2043+
return none_or_err(eval_mode, type_name, str);
2044+
}
20732045
}
20742046
} else {
20752047
// make sure fractional digits are valid digits but ignore them

0 commit comments

Comments
 (0)