Skip to content

Commit b3519c1

Browse files
committed
perf: improve performance marginally
Verification performance overhead reduced from 2.83 to 2.47
1 parent 6b4cedb commit b3519c1

File tree

1 file changed

+18
-8
lines changed

1 file changed

+18
-8
lines changed

pandas/_libs/parsers.pyx

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1351,7 +1351,7 @@ cdef class TextReader:
13511351
bint na_filter, kh_str_starts_t *na_hashset):
13521352
"""Check if the column contains any float number."""
13531353
cdef:
1354-
Py_ssize_t i, lines = end - start
1354+
Py_ssize_t i, j, lines = end - start
13551355
coliter_t it
13561356
const char *word = NULL
13571357

@@ -1368,22 +1368,32 @@ cdef class TextReader:
13681368
continue
13691369

13701370
found_first_digit = False
1371-
for c in word:
1372-
if not found_first_digit and c in ignored_chars:
1373-
continue
1374-
elif not found_first_digit and c not in digits:
1371+
j = 0
1372+
while word[j] != b"\0":
1373+
if not found_first_digit and word[j] in ignored_chars:
1374+
# no-op
1375+
pass
1376+
elif not found_first_digit and word[j] not in digits:
13751377
# word isn't numeric
13761378
return False
1377-
elif not found_first_digit:
1379+
elif not found_first_digit and word[j] in digits:
13781380
found_first_digit = True
1379-
elif c in float_indicating_chars:
1381+
elif word[j] in float_indicating_chars:
13801382
# preceding chars indicates numeric and
13811383
# current char indicates float
13821384
return True
1383-
elif c not in digits:
1385+
elif word[j] not in digits:
13841386
# previous characters indicates numeric
13851387
# current character shows otherwise
13861388
return False
1389+
elif word[j] in digits:
1390+
# no-op
1391+
pass
1392+
else:
1393+
raise AssertionError(
1394+
f"Unhandled case {word[j]=} {found_first_digit=}"
1395+
)
1396+
j += 1
13871397

13881398
return False
13891399

0 commit comments

Comments
 (0)