Skip to content

Commit 0d61b8d

Browse files
committed
fix(parser): integer overflow reads as PyLongObject
1 parent b20d6ab commit 0d61b8d

File tree

1 file changed

+33
-3
lines changed

1 file changed

+33
-3
lines changed

pandas/_libs/parsers.pyx

Lines changed: 33 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ from cpython.exc cimport (
2929
PyErr_Fetch,
3030
PyErr_Occurred,
3131
)
32+
from cpython.long cimport PyLong_FromString
3233
from cpython.object cimport PyObject
3334
from cpython.ref cimport (
3435
Py_INCREF,
@@ -1081,9 +1082,8 @@ cdef class TextReader:
10811082
np.dtype("object"), i, start, end, 0,
10821083
0, na_hashset, na_fset)
10831084
except OverflowError:
1084-
col_res, na_count = self._convert_with_dtype(
1085-
np.dtype("object"), i, start, end, na_filter,
1086-
0, na_hashset, na_fset)
1085+
col_res, na_count = _try_pylong(self.parser, i, start,
1086+
end, na_filter, na_hashset)
10871087

10881088
if col_res is not None:
10891089
break
@@ -1873,6 +1873,36 @@ cdef int _try_int64_nogil(parser_t *parser, int64_t col,
18731873

18741874
return 0
18751875

1876+
cdef _try_pylong(parser_t *parser, Py_ssize_t col,
1877+
int64_t line_start, int64_t line_end,
1878+
bint na_filter, kh_str_starts_t *na_hashset):
1879+
cdef:
1880+
int na_count = 0
1881+
Py_ssize_t lines
1882+
coliter_t it
1883+
const char *word = NULL
1884+
ndarray[object] result
1885+
object NA = na_values[np.object_]
1886+
1887+
lines = line_end - line_start
1888+
result = np.empty(lines, dtype=object)
1889+
coliter_setup(&it, parser, col, line_start)
1890+
1891+
for i in range(lines):
1892+
COLITER_NEXT(it, word)
1893+
if na_filter and kh_get_str_starts_item(na_hashset, word):
1894+
# in the hash table
1895+
na_count += 1
1896+
result[i] = NA
1897+
continue
1898+
1899+
py_int = PyLong_FromString(word, NULL, 10)
1900+
if py_int is None:
1901+
raise ValueError("Invalid integer ", word)
1902+
result[i] = py_int
1903+
1904+
return result, na_count
1905+
18761906

18771907
# -> tuple[ndarray[bool], int]
18781908
cdef _try_bool_flex(parser_t *parser, int64_t col,

0 commit comments

Comments
 (0)