Skip to content

Commit d85aaf0

Browse files
committed
fix: use builtin overflow check verification
1 parent 280b55e commit d85aaf0

File tree

1 file changed

+26
-33
lines changed

1 file changed

+26
-33
lines changed

pandas/_libs/src/parser/tokenizer.c

Lines changed: 26 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ GitHub. See Python Software Foundation License and BSD licenses for these.
2424
#include <limits.h>
2525
#include <math.h>
2626
#include <stdbool.h>
27+
#include <stdckdint.h>
2728
#include <stddef.h>
2829
#include <stdlib.h>
2930

@@ -1898,37 +1899,6 @@ static int power_int(int base, int exponent) {
18981899
return result * base;
18991900
}
19001901

1901-
static inline int64_t add_int_check_overflow(int64_t lhs, int64_t rhs,
1902-
int64_t mul_lhs) {
1903-
// rhs will always be positive, because this function
1904-
// only executes after the first parse, hence the sign will always go to lhs.
1905-
// if lhs > 0:
1906-
// Will overflow if (mul_lhs * lhs) + rhs > INT_MAX
1907-
// iff lhs > (INT_MAX - rhs) / mul_lhs
1908-
// if lhs < 0:
1909-
// Will underflow if (mul_lhs * lhs) - rhs < INT_MIN
1910-
// iff lhs < (INT_MIN + rhs) / mul_lhs
1911-
if (lhs >= 0) {
1912-
if (lhs > (INT_MAX - rhs) / mul_lhs) {
1913-
errno = ERANGE;
1914-
}
1915-
} else {
1916-
if (lhs < (INT_MIN + rhs) / mul_lhs) {
1917-
errno = ERANGE;
1918-
}
1919-
rhs = -rhs;
1920-
}
1921-
return lhs * mul_lhs + rhs;
1922-
}
1923-
1924-
static inline uint64_t add_uint_check_overflow(uint64_t lhs, uint64_t rhs,
1925-
uint64_t mul_lhs) {
1926-
if (lhs > (UINT_MAX - rhs) / mul_lhs) {
1927-
errno = ERANGE;
1928-
}
1929-
return lhs * mul_lhs + rhs;
1930-
}
1931-
19321902
int64_t str_to_int64(const char *p_item, int64_t int_min, int64_t int_max,
19331903
int *error, char tsep) {
19341904
if (!p_item || *p_item == '\0') {
@@ -1948,6 +1918,7 @@ int64_t str_to_int64(const char *p_item, int64_t int_min, int64_t int_max,
19481918
errno = 0;
19491919
char *endptr = NULL;
19501920
int64_t result = strtoll(p_item, &endptr, 10);
1921+
bool is_negative = result < 0;
19511922

19521923
while (errno == 0 && tsep != '\0' && *endptr == tsep) {
19531924
// Skip multiple consecutive tsep
@@ -1957,9 +1928,22 @@ int64_t str_to_int64(const char *p_item, int64_t int_min, int64_t int_max,
19571928

19581929
char *new_end = NULL;
19591930
int64_t next_part = strtoll(endptr, &new_end, 10);
1931+
if (is_negative) {
1932+
next_part = -next_part;
1933+
}
1934+
19601935
ptrdiff_t digits = new_end - endptr;
19611936
int64_t mul_result = power_int(10, (int)digits);
1962-
result = add_int_check_overflow(result, next_part, mul_result);
1937+
// result * mul_result
1938+
if (ckd_mul(&result, result, mul_result)) {
1939+
// overflow
1940+
errno = ERANGE;
1941+
}
1942+
// result + next_part
1943+
if (ckd_add(&result, result, next_part)) {
1944+
// overflow
1945+
errno = ERANGE;
1946+
}
19631947
endptr = new_end;
19641948
}
19651949

@@ -2017,7 +2001,16 @@ uint64_t str_to_uint64(uint_state *state, const char *p_item, int64_t int_max,
20172001
uint64_t next_part = strtoull(endptr, &new_end, 10);
20182002
ptrdiff_t digits = new_end - endptr;
20192003
uint64_t mul_result = power_int(10, (int)digits);
2020-
result = add_uint_check_overflow(result, next_part, mul_result);
2004+
// result * mul_result
2005+
if (ckd_mul(&result, result, mul_result)) {
2006+
// overflow
2007+
errno = ERANGE;
2008+
}
2009+
// result + next_part
2010+
if (ckd_add(&result, result, next_part)) {
2011+
// overflow
2012+
errno = ERANGE;
2013+
}
20212014
endptr = new_end;
20222015
}
20232016

0 commit comments

Comments
 (0)