Skip to content

Commit 31f26cf

Browse files
committed
fix: fix error logic by comparing pointers
1 parent 803a8bf commit 31f26cf

File tree

1 file changed

+10
-21
lines changed

1 file changed

+10
-21
lines changed

pandas/_libs/src/parser/tokenizer.c

Lines changed: 10 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1888,33 +1888,22 @@ static int copy_string_without_char(char output[PROCESSED_WORD_CAPACITY],
18881888
const char *end_ptr = str + str_len;
18891889
size_t bytes_written = 0;
18901890

1891-
while ((right = memchr(left, char_to_remove, str_len - bytes_written)) !=
1892-
NULL) {
1893-
size_t nbytes = right - left;
1891+
while (left < end_ptr) {
1892+
right = memchr(left, char_to_remove, str_len - bytes_written);
1893+
1894+
// If it doesn't find the char to remove, just copy until EOS.
1895+
size_t chunk_size = right ? right - left : end_ptr - left;
18941896

18951897
// check if we have enough space, including the null terminator.
1896-
if (nbytes + bytes_written >= PROCESSED_WORD_CAPACITY) {
1898+
if (chunk_size + bytes_written >= PROCESSED_WORD_CAPACITY) {
18971899
return -1;
18981900
}
18991901
// copy block
1900-
memcpy(&output[bytes_written], left, nbytes);
1901-
bytes_written += nbytes;
1902-
left = right + 1;
1902+
memcpy(&output[bytes_written], left, chunk_size);
1903+
bytes_written += chunk_size;
19031904

1904-
// Exit after processing the entire string
1905-
if (left >= end_ptr) {
1906-
break;
1907-
}
1908-
}
1909-
1910-
// copy final chunk that doesn't contain char_to_remove
1911-
if (end_ptr > left) {
1912-
size_t nbytes = end_ptr - left;
1913-
if (nbytes + bytes_written >= PROCESSED_WORD_CAPACITY) {
1914-
return -1;
1915-
}
1916-
memcpy(&output[bytes_written], left, nbytes);
1917-
bytes_written += nbytes;
1905+
// Advance past the removed character if we found it.
1906+
left = right ? right + 1 : end_ptr;
19181907
}
19191908

19201909
// null terminate

0 commit comments

Comments
 (0)